#Loading libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(viridis)
## Loading required package: viridisLite
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:viridis':
##
## viridis_pal
##
## The following objects are masked from 'package:psych':
##
## alpha, rescale
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(purrr)
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
HR_Analytics <- read_csv("HR_Analytics.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Randomly selecting just 500 from the data set so everything doesn't take so long
# We randomly pull numbers from 1 to length of data and use those to index which rows we want to pull
set.seed(123)
rows.to.select <- sample(1:nrow(HR_Analytics), size = 500, replace = FALSE)
HR_Analytics <- HR_Analytics[rows.to.select, ]
#Get an overview of the division of attrition to no attrition
summary(HR_Analytics)
## Age Attrition BusinessTravel DailyRate
## Min. :18.00 Length:500 Length:500 Min. : 102.0
## 1st Qu.:31.00 Class :character Class :character 1st Qu.: 439.8
## Median :36.00 Mode :character Mode :character Median : 787.5
## Mean :37.44 Mean : 795.5
## 3rd Qu.:44.00 3rd Qu.:1167.5
## Max. :60.00 Max. :1498.0
## Department DistanceFromHome Education EducationField
## Length:500 Min. : 1.000 Min. :1.000 Length:500
## Class :character 1st Qu.: 2.000 1st Qu.:2.000 Class :character
## Mode :character Median : 7.000 Median :3.000 Mode :character
## Mean : 9.538 Mean :2.996
## 3rd Qu.:15.000 3rd Qu.:4.000
## Max. :29.000 Max. :5.000
## EmployeeCount EmployeeNumber EnvironmentSatisfaction Gender
## Min. :1 Min. : 8.0 Min. :1.00 Length:500
## 1st Qu.:1 1st Qu.: 474.8 1st Qu.:2.00 Class :character
## Median :1 Median : 974.5 Median :3.00 Mode :character
## Mean :1 Mean :1004.6 Mean :2.76
## 3rd Qu.:1 3rd Qu.:1542.5 3rd Qu.:4.00
## Max. :1 Max. :2068.0 Max. :4.00
## HourlyRate JobInvolvement JobLevel JobRole
## Min. : 30.00 Min. :1.000 Min. :1.000 Length:500
## 1st Qu.: 49.00 1st Qu.:2.000 1st Qu.:1.000 Class :character
## Median : 65.00 Median :3.000 Median :2.000 Mode :character
## Mean : 65.71 Mean :2.752 Mean :2.158
## 3rd Qu.: 83.00 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :100.00 Max. :4.000 Max. :5.000
## JobSatisfaction MaritalStatus MonthlyIncome MonthlyRate
## Min. :1.00 Length:500 Min. : 1052 Min. : 2097
## 1st Qu.:2.00 Class :character 1st Qu.: 2898 1st Qu.: 8677
## Median :3.00 Mode :character Median : 5044 Median :14550
## Mean :2.74 Mean : 6833 Mean :14366
## 3rd Qu.:4.00 3rd Qu.: 9390 3rd Qu.:20329
## Max. :4.00 Max. :19999 Max. :26959
## NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. :0.000 Length:500 Length:500 Min. :11.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:12.00
## Median :2.000 Mode :character Mode :character Median :14.00
## Mean :2.862 Mean :15.33
## 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :9.000 Max. :25.00
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## Min. :3.000 Min. :1.000 Min. :80 Min. :0.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.000
## Median :3.000 Median :3.000 Median :80 Median :1.000
## Mean :3.162 Mean :2.754 Mean :80 Mean :0.782
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.000
## Max. :4.000 Max. :4.000 Max. :80 Max. :3.000
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.00 Min. :1.000 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.00 1st Qu.:2.000 1st Qu.: 3.000
## Median :10.00 Median :3.00 Median :3.000 Median : 5.000
## Mean :12.02 Mean :2.87 Mean :2.768 Mean : 7.366
## 3rd Qu.:17.00 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:10.000
## Max. :40.00 Max. :6.00 Max. :4.000 Max. :40.000
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.00 1st Qu.: 0.000 1st Qu.: 2.000
## Median : 3.00 Median : 1.000 Median : 3.000
## Mean : 4.42 Mean : 2.476 Mean : 4.302
## 3rd Qu.: 7.00 3rd Qu.: 4.000 3rd Qu.: 7.000
## Max. :18.00 Max. :15.000 Max. :17.000
count(HR_Analytics, Attrition)
## # A tibble: 2 × 2
## Attrition n
## <chr> <int>
## 1 No 415
## 2 Yes 85
count(HR_Analytics, Attrition, Gender)
## # A tibble: 4 × 3
## Attrition Gender n
## <chr> <chr> <int>
## 1 No Female 156
## 2 No Male 259
## 3 Yes Female 35
## 4 Yes Male 50
# What variables do I have?
all.var <- names(HR_Analytics)
# Want to follow up on these different ways to represent money - all highly correlated?
money.var <- c("DailyRate", "HourlyRate", "MonthlyIncome", "MonthlyRate")
#List of Categorical Variables
cat.var <- c("Attrition", "BusinessTravel", "Department", "EducationField", "Gender", "JobRole", "MaritalStatus", "Over18", "OverTime")
#List of Not Helpful Variables
not.helpful.var <- c("EmployeeCount", "EmployeeNumber", "PerformanceRating", "StandardHours")
num.var <- setdiff(all.var, cat.var) |>
setdiff(not.helpful.var)
# Subset the data to include only the relevant variables
df_subset <- HR_Analytics[, c("DailyRate", "HourlyRate", "MonthlyIncome", "MonthlyRate")]
# Create a pair plot for the selected variables with theme_minimal()
ggpairs(df_subset,
upper = list(continuous = wrap("cor", size = 5)), # Correlation in upper
lower = list(continuous = wrap("points", alpha = 0.5)), # Scatterplots in lower
diag = list(continuous = wrap("densityDiag")), # Density plots on diagonal
title = "Pair Plot for DailyRate, HourlyRate, MonthlyIncome, and MonthlyRate") +
theme_minimal() # Apply minimal theme
# Barely any correlation seen in pair plot, for ease of use and for practice purposes will use MonthlyIncome as sole indicator of income
#Creating a table of descriptives for all helpful variables
# Select only the relevant columns
all.included.var <- HR_Analytics %>%
select(JobSatisfaction, EnvironmentSatisfaction, MonthlyIncome,
WorkLifeBalance, YearsAtCompany, YearsInCurrentRole, OverTime, Gender)
# Split into numeric and categorical columns
numeric_cols <- all.included.var %>%
select(where(is.numeric))
categorical_cols <- all.included.var %>%
select(OverTime, Gender)
# Calculate summary statistics (Min, Max, Mean, SD) for numeric variables
numeric_descriptives <- map_dfr(numeric_cols, ~ tibble(
Variable = deparse(substitute(.)),
Min = round(min(., na.rm = TRUE), 2),
Max = round(max(., na.rm = TRUE), 2),
Mean = round(mean(., na.rm = TRUE), 2),
SD = round(sd(., na.rm = TRUE), 2)
), .id = "Variable")
# Create the flextable
descriptives_table <- flextable(numeric_descriptives)
# Display the table
descriptives_table
Variable | Min | Max | Mean | SD |
|---|---|---|---|---|
JobSatisfaction | 1 | 4 | 2.74 | 1.09 |
EnvironmentSatisfaction | 1 | 4 | 2.76 | 1.08 |
MonthlyIncome | 1,052 | 19,999 | 6,832.89 | 5,008.53 |
WorkLifeBalance | 1 | 4 | 2.77 | 0.73 |
YearsAtCompany | 0 | 40 | 7.37 | 6.52 |
YearsInCurrentRole | 0 | 18 | 4.42 | 3.79 |
# Create bar plot showing attrition by gender
# Summarize the data to calculate proportions of attrition by gender
attrition_by_gender <- HR_Analytics %>%
group_by(Gender, Attrition) %>%
summarise(count = n()) %>%
mutate(percentage = count / sum(count) * 100) %>%
ungroup()
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
# Create a bar plot with percentages
ggplot(attrition_by_gender, aes(x = Gender, y = percentage, fill = Attrition)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_dodge(0.9), vjust = -0.5) +
labs(title = "Attrition by Gender (Percentage)",
x = "Gender",
y = "Percentage of Employees",
fill = "Attrition") +
theme_minimal()
# Select relevant columns including Attrition for grouping
df_selected <- HR_Analytics %>%
select(Attrition, JobSatisfaction, EnvironmentSatisfaction,
MonthlyIncome, WorkLifeBalance, YearsAtCompany, YearsInCurrentRole, Age)
# JobSatisfaction by Attrition
ggplot(df_selected, aes(x = Attrition, y = JobSatisfaction, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) + # Hide default outliers for custom labeling
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") + # Add jitter for better visualization
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) + # Add mean point
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) + # Label median
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) + # Label mean
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") + # Use viridis palette
labs(title = "Job Satisfaction by Attrition", x = "Attrition", y = "Job Satisfaction") +
theme_minimal(base_size = 14) + # Ensure text is legible
theme(legend.position = "none", # Remove legend as Attrition is already on the x-axis
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)) # Adjust text sizes for readability
# EnvironmentSatisfaction by Attrition
ggplot(df_selected, aes(x = Attrition, y = EnvironmentSatisfaction, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Environment Satisfaction by Attrition", x = "Attrition", y = "Environment Satisfaction") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# MonthlyIncome by Attrition
ggplot(df_selected, aes(x = Attrition, y = MonthlyIncome, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Monthly Income by Attrition", x = "Attrition", y = "Monthly Income") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# WorkLifeBalance by Attrition
ggplot(df_selected, aes(x = Attrition, y = WorkLifeBalance, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Work-Life Balance by Attrition", x = "Attrition", y = "Work-Life Balance") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# YearsAtCompany by Attrition
ggplot(df_selected, aes(x = Attrition, y = YearsAtCompany, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Years at Company by Attrition", x = "Attrition", y = "Years at Company") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# YearsInCurrentRole by Attrition
ggplot(df_selected, aes(x = Attrition, y = YearsInCurrentRole, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Years in Current Role by Attrition", x = "Attrition", y = "Years in Current Role") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# Age by Attrition
ggplot(df_selected, aes(x = Attrition, y = Age, fill = Attrition)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "gray40") +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red", show.legend = FALSE) +
stat_summary(fun.data = function(y) data.frame(y=median(y), label = paste("Median:", round(median(y), 2))),
geom = "text", vjust = -1.5, size = 4) +
stat_summary(fun.data = function(y) data.frame(y=mean(y), label = paste("Mean:", round(mean(y), 2))),
geom = "text", vjust = 1.5, color = "red", size = 4) +
stat_summary(fun.data = function(y) data.frame(y=min(y), label=paste("Min:", round(min(y), 2))),
geom="text", vjust=1.5, hjust=-0.2, size=4, color="blue") + # Label min (lower whisker)
stat_summary(fun.data = function(y) data.frame(y=max(y), label=paste("Max:", round(max(y), 2))),
geom="text", vjust=-1.5, hjust=1.2, size=4, color="blue") + # Label max (upper whisker)
scale_fill_viridis(discrete = TRUE, option = "D") +
labs(title = "Age by Attrition", x = "Attrition", y = "Age") +
theme_minimal(base_size = 14) +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
# Load necessary libraries
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.21.1
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(dplyr)
# Define numeric columns
numeric_columns <- c("Age", "MonthlyIncome", "EnvironmentSatisfaction", "JobSatisfaction",
"WorkLifeBalance", "YearsAtCompany", "YearsInCurrentRole")
# Subset the data to include only numeric columns and Attrition
hr_numeric_data <- HR_Analytics[, numeric_columns]
attrition_data <- HR_Analytics$Attrition # Extract the Attrition column
# Scale the numeric data
hr_data_scaled <- scale(hr_numeric_data)
# Define a function for uncentered cosine distance
uncenter.dist <- function(m) {
# Calculate the cosine similarity
norm <- sqrt(rowSums(m^2)) # Calculate the norms
similarity <- (m %*% t(m)) / (norm %*% t(norm)) # Cosine similarity matrix
# Convert cosine similarity to distance
as.dist(1 - similarity) # Cosine distance
}
# Perform hierarchical clustering on rows and columns
row.clus <- hclust(uncenter.dist(hr_data_scaled), method = "average")
col.clus <- hclust(uncenter.dist(t(hr_data_scaled)), method = "average")
# Create a k-means clustering solution
set.seed(123) # Setting seed for reproducibility
k <- 3 # Number of clusters for k-means
kmeans_result <- kmeans(hr_data_scaled, centers = k)
# Reorder rows based on k-means clustering
ordered_indices <- order(kmeans_result$cluster)
hr_data_scaled_ordered <- hr_data_scaled[ordered_indices, ]
attrition_data_ordered <- attrition_data[ordered_indices] # Reorder attrition data
# Create a color palette for the attrition status
attrition_colors <- c("Yes" = "black", "No" = "white")
# Create a row annotation for the attrition column
attrition_annotation <- rowAnnotation(
Attrition = attrition_data_ordered,
col = list(Attrition = attrition_colors),
annotation_legend_param = list(Attrition = list(title = "Attrition Status"))
)
# Create the heatmap with the attrition row annotation
heatmap_ordered <- Heatmap(hr_data_scaled_ordered,
name = "HR Analytics Heatmap",
cluster_rows = row.clus,
cluster_columns = col.clus,
right_annotation = attrition_annotation) # Apply attrition to rows
# Draw the heatmap with the attrition row annotation
draw(heatmap_ordered)