# Load required packages
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Set random seed for reproducibility
set.seed(123)

# ====== 1. Simulate sample data ======
# Create a long-format data frame: 4 genes × 4 compounds
df_long <- expand.grid(
  Entrez_Symbol..NCBI. = c("EGFR", "MAPK1", "CDK2", "AKT1"),
  Sample.Name = c("Cmpd_A", "Cmpd_B", "Cmpd_C", "Cmpd_D")
)
df_long 
##    Entrez_Symbol..NCBI. Sample.Name
## 1                  EGFR      Cmpd_A
## 2                 MAPK1      Cmpd_A
## 3                  CDK2      Cmpd_A
## 4                  AKT1      Cmpd_A
## 5                  EGFR      Cmpd_B
## 6                 MAPK1      Cmpd_B
## 7                  CDK2      Cmpd_B
## 8                  AKT1      Cmpd_B
## 9                  EGFR      Cmpd_C
## 10                MAPK1      Cmpd_C
## 11                 CDK2      Cmpd_C
## 12                 AKT1      Cmpd_C
## 13                 EGFR      Cmpd_D
## 14                MAPK1      Cmpd_D
## 15                 CDK2      Cmpd_D
## 16                 AKT1      Cmpd_D
# Randomly assign binary values (0 = inactive, 1 = active)
df_long$Value <- sample(c(0, 1), size = nrow(df_long), replace = TRUE)

# Simulate custom sorting labels for genes and compounds
labs <- data.frame(label = c("MAPK1", "AKT1", "CDK2", "EGFR"))         # gene order
lab_1 <- data.frame(label = c("Cmpd_C", "Cmpd_A", "Cmpd_D", "Cmpd_B")) # compound order

# ====== 2. Sorting based on defined order ======

# Sort Sample.Name (compounds) based on lab_1
df_long$Compound <- factor(df_long$Sample.Name, levels = lab_1$label)
df_sorted_compound <- df_long[order(df_long$Compound), ]
df_long$Sample.Name <- factor(df_long$Sample.Name, levels = unique(df_sorted_compound$Sample.Name))

# Sort Entrez_Symbol..NCBI. (genes) based on labs
df_long$Gene <- factor(df_long$Entrez_Symbol..NCBI., levels = labs$label)
df_sorted_gene <- df_long[order(df_long$Gene), ]
df_long$Entrez_Symbol..NCBI. <- factor(df_long$Entrez_Symbol..NCBI.,
                                       levels = unique(df_sorted_gene$Entrez_Symbol..NCBI.))

# ====== 3. Create heatmap with ggplot2 ======

p <- ggplot(df_long, aes(x = Entrez_Symbol..NCBI., y = Sample.Name, fill = factor(Value))) +
  geom_tile(color = "white", size = 0.6) +  # tile border and size
  scale_fill_manual(
    values = c("0" = "gray95", "1" = "#B3CDE3"),
    labels = c("0" = "Inactive", "1" = "Active"),
    name = "Category"
  ) +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12, face = "plain", family = "sans"),
    axis.text.y = element_text(hjust = 1, vjust = 0.5, size = 12, face = "plain", family = "sans"),
    axis.title = element_blank(),
    axis.ticks = element_line(color = "black", size = 0.4),
    axis.ticks.length = unit(0.1, "cm"),
    axis.ticks.x.top = element_blank(),
    axis.ticks.y.right = element_blank(),
    legend.background = element_rect(fill = NA, size = 0.1, linetype = "solid", colour = "white"),
    legend.key = element_rect(colour = NA, fill = NA),
    legend.text = element_text(colour = "black", size = 12, face = "plain", family = "sans")
  ) +
  ggtitle("Example (Top 4 Kinases x Top 4 Compounds)") +
  coord_fixed()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the plot
print(p)

# ====== 4. Save the plot to a TIFF file ======
dir_path <- "."  # Save to current directory

ggsave(
  filename = paste0(Sys.Date(), "-HM-1.tif"),
  plot = p,
  device = "tiff",
  path = dir_path,
  scale = 1,
  width = 17,
  height = 17,
  units = "cm",
  dpi = 300,
  limitsize = TRUE,
  compression = "lzw"
)