library(tidyverse)
library(gghighlight)
library(knitr)Warning: package 'knitr' was built under R version 4.4.2
employee <- read_csv("employee.csv")library(tidyverse)
library(gghighlight)
library(knitr)Warning: package 'knitr' was built under R version 4.4.2
employee <- read_csv("employee.csv")The chunk above sets up the Quarto file by;
I am using the “employee.csv” file for this assignment.
summarize(employee, min(Age)) #minimum age## A tibble: 1 × 1
`min(Age)`
<dbl>
1 18
summarize(employee, max(Age)) #maximum age## A tibble: 1 × 1
`max(Age)`
<dbl>
1 60
employee <- mutate(employee, Age_Bracket = case_when(
Age =18 & Age <21 ~ "18-20",
Age =21 & Age <31 ~ "20-30",
Age =31 & Age <41 ~ "30-40",
Age =41 & Age <51 ~ "40-50",
Age =41 & Age <61 ~ "50-60",
TRUE ~ "0")) ggplot(data = employee) +
geom_bar(mapping = aes(x = Age_Bracket, fill = Gender),position = "dodge") +
scale_fill_manual(values = c("#D50057", "#003595")) +
theme(plot.title = element_text(hjust = 0.5)) +
ggtitle("Age by Gender") +
xlab("Age") +
ylab("# of people") +
theme_minimal() +
theme(axis.line = element_line(colour = "grey50")) +
theme_minimal() +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold")) +
theme(plot.title = element_text(hjust = 0.5)) As can be seen above, there is more males than females working at BioMarin Pharmaceuticals.
Highest population in bracket 30 - 40.
Mean_Pay_By_Gender <- employee %>%
group_by(Gender) %>%
summarise(mean_monthly_pay = mean(MonthlyRate)) %>%
arrange(mean_monthly_pay)
Mean_Performance_By_Gender <- employee %>%
group_by(Gender) %>%
summarise(mean_performance_rating = round (mean(PerformanceRating))) %>%
arrange(mean_performance_rating)
Table_1_Gender <- merge(x = Mean_Pay_By_Gender, y = Mean_Performance_By_Gender, by ="Gender", all=TRUE)
knitr::kable(Table_1_Gender, "pipe", col.names = c("Gender", "Mean Monthly Pay", "Performance Rating"), align = c("c", "c", "c"), caption = "Mean Pay and Performance by Gender")| Gender | Mean Monthly Pay | Performance Rating |
|---|---|---|
| Female | 14674.60 | 3 |
| Male | 14072.11 | 3 |
Mean_Pay_By_Age <- employee %>%
group_by(Age_Bracket) %>%
summarise(mean_monthly_pay = mean(MonthlyRate)) %>%
arrange(mean_monthly_pay)
Mean_Performance_By_Age <- employee %>%
group_by(Age_Bracket) %>%
summarise(mean_performance = round(mean(PerformanceRating), 2)) %>%
arrange(mean_performance)
Table_2_Age <- merge(x = Mean_Pay_By_Age, y = Mean_Performance_By_Age, by ="Age_Bracket", all=TRUE)
knitr::kable(Table_2_Age, "pipe", col.names = c("Age Bracket", "Mean Monthly Pay", "Performance Rating"), align = c("c", "c", "c"), caption = "Mean Pay and Performance by Age Bracket")| Age Bracket | Mean Monthly Pay | Performance Rating |
|---|---|---|
| 18-20 | 15470.54 | 3.14 |
| 20-30 | 14039.06 | 3.15 |
| 30-40 | 14273.64 | 3.15 |
| 40-50 | 13794.02 | 3.15 |
| 50-60 | 16112.22 | 3.17 |
Age bracket of 50 - 60 earns the most on average, with a 3.17 performance rating.
The second highest earning age bracket is 18 - 20, despite having the lowest performance rating (3.14).
Age bracket 40 - 50 earns the least on average.
geomcol_annotation <- data.frame(
x = c(3.68),
y = c(5),
label = c(" highest
performance"))
ggplot(data = Mean_Performance_By_Age) +
geom_col(mapping = aes(x = mean_performance, y = Age_Bracket, fill = Age_Bracket)) +
geom_text(aes(x = mean_performance, y = Age_Bracket,
label = mean_performance), hjust = -0.10, color = "#666666") +
xlim(0,4) +
ggtitle("Work Performance by Age Bracket") +
xlab("Mean Performance") +
ylab("Age Bracket") +
geom_text(data = geomcol_annotation, aes( x=x, y=y, label=label), ,
color="#C017A2",
size = 3 , fontface = "bold") +
theme_minimal() +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold")) +
theme(plot.title = element_text(hjust = 0.5)) +
gghighlight(Age_Bracket == "50-60") +
scale_fill_manual(values = c("#C017A2"))Warning: Tried to calculate with group_by(), but the calculation failed.
Falling back to ungrouped filter operation...
Warning: Could not calculate the predicate for layer 3; ignored
employee <- mutate(employee, YearlyRate = MonthlyRate*12)ggplot(data = employee) +
geom_boxplot(mapping = aes(x = Department, y = YearlyRate, fill = BusinessTravel)) +
scale_fill_manual(values = c("#FF4438", "#C017A2", "#0197f6")) +
ggtitle ("Annual Pay by Department") +
xlab("Department") +
ylab("Annual Pay, in '000") +
scale_y_continuous(limits = c(0, 350000), breaks = c(0, 87500, 175000, 262500, 350000), labels = c('0','87.5', '175', '262.5', '350')) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold"))The department of sales earns the most on average, with those travelling rarely earning the most.
The department of Human Resources earns the least on average, with those travelling frequently earning the least.
In most cases, travelling frequently lowers annual salary (across all departments).
scatterplot_annotation <- data.frame(
x = c(10),
y = c(5),
label = c("Positive correlation"))
scatterplot <- ggplot(data = employee) +
geom_point (mapping = aes(x = YearsInCurrentRole, y = YearsAtCompany)) +
ggtitle("Correlation between Time at Company and Time in Current Role (in years)") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Time in Current Role") +
ylab("Time at Compnay") +
xlim(0, 18) +
annotate("segment", x = 0, xend = 18, y = 0, yend = 20, colour = "#C017A2", linewidth = 3, alpha = 0.6, arrow = arrow()) +
geom_text(data = scatterplot_annotation, aes( x=x, y=y, label=label), ,
color = "#C017A2",
size = 5 , fontface="bold" ) +
theme_minimal() +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold"))
scatterplotThere is a positive correlation between Years spent in Current Role and Overall Years Spent in the Company.
This suggests that employees working in the same role for multiple years are more likely to keep working at BioMarin Pharmaceuticals for the long run.
summarize(employee, min(TrainingTimesLastYear)) #0## A tibble: 1 × 1
`min(TrainingTimesLastYear)`
<dbl>
1 0
summarize(employee, max(TrainingTimesLastYear)) #6## A tibble: 1 × 1
`max(TrainingTimesLastYear)`
<dbl>
1 6
employee <- mutate(employee, Training_Level = case_when(
TrainingTimesLastYear = 0 & TrainingTimesLastYear <3 ~ "Low",
TrainingTimesLastYear = 2 & TrainingTimesLastYear <5 ~ "Medium",
TrainingTimesLastYear = 4 & TrainingTimesLastYear <7 ~ "High",
TRUE ~ "0")) #training level for easier histogram creation#ggplot(data = employee) +
geom_histogram(mapping = aes(x = JobSatisfaction, fill = Training_Level), binwidth = 1, colour = "black") +
scale_x_continuous(limits = c(0, 5), breaks = c(0, 1, 2, 3, 4)) +
scale_fill_manual(values = c("#60269E", "#C017A2")) +
ggtitle("Correlation between Job Satisfaction and Training") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Job Satisfcation") +
ylab("Count of Training") +
theme_minimal() +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold"))Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(data = employee) +
geom_histogram(mapping = aes(x = PerformanceRating, fill = Training_Level), binwidth = 1, colour = "black") +
scale_x_continuous(limits = c(0, 5), breaks = c(0, 1, 2, 3, 4)) +
scale_fill_manual(values = c("#60269E", "#C017A2")) +
ggtitle("Correlation between Job Performance and Training") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Performance Rating") +
ylab("Count of Training") +
theme_minimal() +
theme(panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold"))Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(data = employee) +
geom_jitter(mapping = aes(x = JobRole, y = JobSatisfaction, fill = Attrition, colour = Attrition)) +
scale_color_manual(values = c("#D2E0ff", "#C017A2")) +
theme_minimal() +
ggtitle("Attrition by Job Role") +
theme(plot.title = element_text(hjust = 0.5)) + #title in center#
xlab("Job Role") +
ylab("Job Satisfaction") +
theme(axis.text.x=element_text(angle=-90, vjust = 0.5, hjust = 0),
panel.grid.major = element_line(color = "white"),
axis.title.x = element_text(colour = "#60269E", face = "bold"),
axis.title.y = element_text(colour = "#60269E", face = "bold"),
axis.text = element_text(colour = "#666666"),
axis.ticks = element_line(colour = "#666666"),
axis.line = element_line(colour = "#666666"),
title = element_text(colour = "#60269E", face = "bold"),
plot.title = element_text(face = "bold"))Laboratory Technicians, Research Scientists, Sales Executives and Sales Representatives have the highest Attrition Levels.
Research Directors, Managers, Healthcare Representatives and Manufacturing Directors have the lowest Attrition Levels.
Table_3 <- employee %>%
group_by(JobRole) %>%
summarise(mean_job_sat = round (mean(JobSatisfaction),2)) %>%
arrange(mean_job_sat)
knitr::kable(Table_3, "pipe", col.names = c("Job Role", "Job Satisfaction"), align = c("l", "c"), caption = "Job Satisfaction by Job Role (lowest to highest)")| Job Role | Job Satisfaction |
|---|---|
| Human Resources | 2.56 |
| Manufacturing Director | 2.68 |
| Laboratory Technician | 2.69 |
| Research Director | 2.70 |
| Manager | 2.71 |
| Sales Representative | 2.73 |
| Sales Executive | 2.75 |
| Research Scientist | 2.77 |
| Healthcare Representative | 2.79 |
‘Laboratory Technicians, Research Scientists, Sales Executives and Sales Representatives have the highest Attrition Levels.’
‘Research Directors, Managers, Healthcare Representatives and Manufacturing Directors have the lowest Attrition Levels.’