3. Volcano Plot 1
library(ggplot2)
library(dplyr)
library(ggrepel)
# Filter significant genes
de_genes_sig <- de_genes %>%
filter(!is.na(p_val_adj)) %>%
mutate(direction = case_when(
avg_log2FC >= 1 & p_val_adj < 0.05 ~ "Upregulated",
avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
TRUE ~ "NS"
))
# Select top 50 up and 50 downregulated genes
top_genes <- de_genes_sig %>%
filter(direction %in% c("Upregulated", "Downregulated")) %>%
group_by(direction) %>%
slice_min(order_by = p_val_adj, n = 50) %>%
ungroup()
# Volcano plot
volcano <- ggplot(de_genes_sig, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
geom_point(aes(color = direction), alpha = 0.7, size = 2.5) + # Increased dot size here
scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "grey40") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "grey40") +
geom_text_repel(
data = top_genes,
aes(label = gene),
size = 4,
max.overlaps = Inf, # No limit on overlaps
box.padding = 0.5,
segment.size = 0.3
) +
labs(
title = "Volcano Plot: Malignant vs Normal CD4+ T cells",
x = "log2 Fold Change",
y = "-log10 Adjusted p-value",
color = "Regulation"
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "right",
plot.title = element_text(hjust = 0.5)
)
# Display or save
print(volcano)

# ggsave("volcano_plot.pdf", plot = volcano, width = 8, height = 6)
Volcano Plot 2
library(ggplot2)
library(dplyr)
library(ggrepel)
# ---- Step 1: Prepare Data ----
de_genes <- de_genes %>%
mutate(
log10_pval = -log10(p_val_adj),
regulation = case_when(
avg_log2FC >= 1.5 & p_val_adj < 0.05 ~ "Upregulated",
avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
TRUE ~ "NS"
)
)
# ---- Step 2: Top 50 Up and 50 Downregulated Genes ----
top_genes <- de_genes %>%
filter(regulation %in% c("Upregulated", "Downregulated")) %>%
group_by(regulation) %>%
slice_min(order_by = p_val_adj, n = 50) %>%
ungroup()
# ---- Step 3: Plot ----
p3 <- ggplot(de_genes, aes(x = avg_log2FC, y = log10_pval)) +
geom_point(aes(color = regulation), size = 3, alpha = 0.7) +
scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
geom_vline(xintercept = c(-1, 1.5), linetype = "dashed", color = "grey50") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "grey50") +
geom_label_repel(
data = top_genes,
mapping = aes(label = gene),
size = 3.5, # text size
max.overlaps = 100,
box.padding = 0.3,
segment.size = 0.3,
min.segment.length = 0
) +
labs(
title = "Volcano Plot: Malignant vs Normal CD4+ T cells",
x = "log2 Fold Change",
y = "-log10 Adjusted p-value",
color = "Regulation"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "right"
)
# ---- Step 4: Display Plot ----
print(p3)

Volcano Plot 3
# Step 1: Store Your Gene Lists in R
# Upregulated genes
up_genes <- c("MYL6B", "CENPF", "CDT1", "CRNDE", "CHEK1", "TPX2", "UBE2T", "CDCA8", "CDKN3", "PCLAF",
"ASF1B", "MCM2", "MYBL2", "PKMYT1", "SLC25A5", "NUSAP1", "CENPW", "AHCY", "TTF2", "COX5A",
"FH", "CKS1B", "UCK2", "CKS2", "CCNB1", "PYCR1", "MAPKAPK3", "NCAPD2", "TRIP13", "RBBP8",
"TUBA1C", "RNASEH2A", "BRCA1", "KPNA2", "NME1", "NABP2", "GTSF1", "CDC20", "SNX10", "MAD2L1",
"YWHAH", "UBE2C", "TOP2A", "EBP", "CLIC1", "GINS2", "SLC29A1", "SRI", "ORC6", "PLK1")
# Downregulated genes
down_genes <- c("SARAF", "PCED1B-AS1", "SNHG5", "BTG1", "SRSF5", "IKZF1", "PRMT2", "RBL2", "PACS1", "MAX",
"ZBTB20", "PIK3IP1", "RAPGEF6", "IL7R", "N4BP2L2", "SF1", "RPS27", "RIPOR2", "RSRP1", "DAZAP2",
"ZFP36L1", "TXNIP", "HLA-E", "DGKA", "SON", "DDX6", "RASA3", "VAMP2", "PNRC1", "CDKN1B",
"PPP2R5C", "LEPROTL1", "EPC1", "CD247", "FOXP1", "PBXIP1", "LINC01578", "CIRBP", "ABLIM1",
"SUN2", "ZFP36", "ZFP36L2", "AL138963.4", "ETS1", "RNASET2", "HIF1A", "PNISR", "R3HDM4",
"ANKRD44", "DDX24")
#Step 2: Process DE Data and Add Regulation Category
highlight_genes <- c(up_genes, down_genes)
library(dplyr)
de_genes_processed <- de_genes %>%
filter(!is.na(p_val_adj)) %>%
mutate(
log10_pval = -log10(p_val_adj),
regulation = case_when(
avg_log2FC >= 1.5 & p_val_adj < 0.05 ~ "Upregulated",
avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
TRUE ~ "NS"
),
highlight = ifelse(gene %in% highlight_genes, gene, NA)
)
# Step 3: Make Volcano Plot with Highlighted Genes
library(ggplot2)
library(ggrepel)
ggplot(de_genes_processed, aes(x = avg_log2FC, y = log10_pval)) +
geom_point(aes(color = regulation), alpha = 0.7, size = 2.8) +
scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
geom_vline(xintercept = c(-1, 1.5), linetype = "dashed", color = "gray50") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
geom_label_repel(
data = filter(de_genes_processed, !is.na(highlight)),
aes(label = highlight),
size = 3.2,
box.padding = 0.3,
max.overlaps = 100,
segment.size = 0.3
) +
labs(
title = "Volcano Plot Highlighting Selected Biomarkers",
x = "log2 Fold Change",
y = "-log10 Adjusted p-value",
color = "Regulation"
) +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(hjust = 0.5))

Volcano Plot 4
# Step 1: Store Your Gene Lists in R
# Upregulated genes
up_genes <- c("MYL6B", "CENPF", "CDT1", "CRNDE", "CHEK1", "TPX2", "UBE2T", "CDCA8", "CDKN3", "PCLAF",
"ASF1B", "MCM2", "MYBL2", "PKMYT1", "SLC25A5", "NUSAP1", "CENPW", "AHCY", "TTF2", "COX5A",
"FH", "CKS1B", "UCK2", "CKS2", "CCNB1", "PYCR1", "MAPKAPK3", "NCAPD2", "TRIP13", "RBBP8",
"TUBA1C", "RNASEH2A", "BRCA1", "KPNA2", "NME1", "NABP2", "GTSF1", "CDC20", "SNX10", "MAD2L1",
"YWHAH", "UBE2C", "TOP2A", "EBP", "CLIC1", "GINS2", "SLC29A1", "SRI", "ORC6", "PLK1")
# Downregulated genes
down_genes <- c("SARAF", "PCED1B-AS1", "SNHG5", "BTG1", "SRSF5", "IKZF1", "PRMT2", "RBL2", "PACS1", "MAX",
"ZBTB20", "PIK3IP1", "RAPGEF6", "IL7R", "N4BP2L2", "SF1", "RPS27", "RIPOR2", "RSRP1", "DAZAP2",
"ZFP36L1", "TXNIP", "HLA-E", "DGKA", "SON", "DDX6", "RASA3", "VAMP2", "PNRC1", "CDKN1B",
"PPP2R5C", "LEPROTL1", "EPC1", "CD247", "FOXP1", "PBXIP1", "LINC01578", "CIRBP", "ABLIM1",
"SUN2", "ZFP36", "ZFP36L2", "AL138963.4", "ETS1", "RNASET2", "HIF1A", "PNISR", "R3HDM4",
"ANKRD44", "DDX24")
# Custom theme for Nature-style plots
nature_theme <- function(base_size = 12, base_family = "sans") {
theme_minimal(base_size = base_size, base_family = base_family) +
theme(
text = element_text(color = "black"),
plot.title = element_text(hjust = 0.5, face = "bold", size = rel(1.2)),
axis.title = element_text(face = "bold", size = rel(1)),
axis.text = element_text(size = rel(0.9)),
legend.position = c(0.85, 0.85),
legend.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(linewidth = 0.25, color = "grey90"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(linewidth = 0.5, color = "black")
)
}
# Enhanced volcano plot with bold text and 1 point bigger size
ggplot(de_genes_processed, aes(x = avg_log2FC, y = log10_pval)) +
geom_point(
aes(color = regulation, alpha = regulation, size = regulation),
shape = 16
) +
scale_color_manual(
values = c(
"Upregulated" = "#D32F2F", # Nature-style red
"Downregulated" = "#1976D2", # Nature-style blue
"NS" = "grey60"
),
breaks = c("Upregulated", "Downregulated")
) +
scale_alpha_manual(
values = c(
"Upregulated" = 0.8, # Slightly reduced opacity for better contrast
"Downregulated" = 0.8,
"NS" = 0.5
),
guide = "none"
) +
scale_size_manual(
values = c(
"Upregulated" = 3.5, # Slightly larger size for upregulated
"Downregulated" = 3.5,
"NS" = 2
),
guide = "none"
) +
geom_vline(
xintercept = c(-1.5, 1.5), # Adjust threshold for more specific markers
linetype = "dashed",
color = "grey40",
linewidth = 0.6
) +
geom_hline(
yintercept = -log10(0.05),
linetype = "dashed",
color = "grey40",
linewidth = 0.6
) +
geom_text_repel(
data = filter(de_genes_processed, !is.na(highlight)),
aes(label = highlight),
size = 4.5, # Increase text size by 1 point
fontface = "bold", # Make text bold
box.padding = 0.45,
point.padding = 0.3,
segment.color = "grey30",
segment.size = 0.4,
max.overlaps = 25,
min.segment.length = 0.1,
force = 2,
color = "black",
bg.color = "white",
bg.r = 0.15
) +
annotate(
"text",
x = c(-2.8, 2.8),
y = rep(max(de_genes_processed$log10_pval) * 0.95, 2),
label = c("Downregulated", "Upregulated"),
color = c("#1976D2", "#D32F2F"),
size = 5, # Increase text size by 1 point
fontface = "bold"
) +
labs(
title = "Volcano Plot of Differentially Expressed Genes",
x = expression(Log[2]~fold~change),
y = expression(-Log[10]~adjusted~italic(p)),
color = "Gene Regulation"
) +
nature_theme() +
guides(
color = guide_legend(override.aes = list(size = 4, alpha = 1))
) +
coord_cartesian(clip = "off") +
scale_x_continuous(expand = expansion(mult = 0.1)) +
scale_y_continuous(expand = expansion(mult = 0.05))

NA
NA
Heatmap-1

Heatmap-2
library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(viridis)
# 1. Prepare gene lists with error checking
all_genes <- unique(c(up_genes, down_genes))
all_genes <- intersect(all_genes, rownames(GetAssayData(sample, assay = "RNA"))) # Corrected line
if(length(all_genes) == 0) stop("No genes from the lists found in the Seurat object")
# 2. Set default assay to "RNA" and scale the data
DefaultAssay(sample) <- "RNA" # Set the default assay to RNA
#sample <- ScaleData(sample) # Scale the RNA data
# 3. Extract scaled data
expr_matrix <- GetAssayData(sample, assay = "RNA", slot = "scale.data")[all_genes, ]
# 4. Create metadata annotations
gene_annotation <- data.frame(
Gene = all_genes,
Regulation = ifelse(all_genes %in% up_genes, "Up", "Down"),
row.names = all_genes
)
# Order by clusters
cluster_order <- order(sample$seurat_clusters)
expr_matrix <- expr_matrix[, cluster_order]
# 5. Create annotations
column_ha <- HeatmapAnnotation(
Cluster = sample$seurat_clusters[cluster_order],
col = list(Cluster = setNames(
viridis(length(unique(sample$seurat_clusters))),
unique(sample$seurat_clusters)
)),
annotation_name_side = "left",
show_legend = TRUE
)
row_ha <- rowAnnotation(
Regulation = gene_annotation[all_genes, "Regulation"],
col = list(Regulation = c("Up" = "#D32F2F", "Down" = "#1976D2")),
show_annotation_name = FALSE
)
# 6. Create heatmap with additional improvements
ht <- Heatmap(expr_matrix,
name = "Z-score",
col = colorRamp2(c(-2, 0, 2), c("#1E88E5", "white", "#D81B60")), # Adjust color scheme for better contrast
cluster_rows = TRUE, # Cluster rows (genes)
cluster_columns = TRUE, # Cluster columns (samples)
show_row_names = TRUE,
show_column_names = FALSE,
row_names_gp = gpar(fontsize = 8), # Smaller row name font
column_title_gp = gpar(fontsize = 12, fontface = "bold"), # Larger column title font
top_annotation = column_ha,
left_annotation = row_ha,
heatmap_legend_param = list(
title_gp = gpar(fontsize = 10, fontface = "bold"),
labels_gp = gpar(fontsize = 8),
direction = "horizontal" # Place legend horizontally for more room
),
row_title = "Genes", # Add a row title
column_split = sample$seurat_clusters, # Split columns by clusters
row_dend_width = unit(4, "cm"),
column_dend_height = unit(4, "cm"), # Adjust column dendrogram height
show_row_dend = TRUE, # Show row dendrogram
show_column_dend = TRUE, # Show column dendrogram
border = TRUE, # Add borders around the cells
row_names_side = "left", # Place row names on the left
column_names_gp = gpar(fontsize = 8, fontface = "italic"), # Italicize column names
use_raster = TRUE # Use rasterization for better performance
)
# 7. Display in notebook with improved padding and legend side
draw(ht, padding = unit(c(4, 4, 4, 4), "mm"),
heatmap_legend_side = "right",
annotation_legend_side = "right")
# 8. Save outputs
# PDF for publication
pdf("publication_heatmap_improved2.pdf", width = 8.7/2.54, height = 11/2.54) # Convert cm to inches
draw(ht)
dev.off()
png
2
# PNG for preview
png("heatmap_preview_improved2.png", width = 2500, height = 3000, res = 300)
draw(ht)
dev.off()
png
2

---
title: "HSPC vs Control_VolcanoPlots"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  # pdf_document: default
  # word_document: default
  # html_document: default
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}
library(SCpubr)
library(readr)
library(Seurat)
library(SCpubr)

# Load necessary libraries
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ReactomePA)
library(DOSE) # For GSEA analysis
library(ggplot2) # Ensure ggplot2 is available for plotting
library(dplyr)

```

# 2. Load the filtered list on mean expression
```{r , fig.height=8, fig.width=10}

load("../../To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")

sample <- All_samples_Merged

rm(All_samples_Merged)

gc()

# Read the CSV file from LIBRA pseudobulk DE
de_genes <- read_csv("../../To_Transfer_between_computers/1-FINAL_Pseudobul_Analysis/2-Malignant_vs_Control_Pseudobulk_Enrichment_Analysis/2-Pseudobulk_Deseq2_LRT_filtered_on_mean.csv")  # Use read.csv() if needed

# Step 2: Rename columns to match SCpubr expectations
de_genes <- de_genes %>%
  rename(
    avg_log2FC = avg_logFC,   # This is crucial
    p_val_adj = p_val_adj,    # Already correct
    gene = gene               # Already correct
  )
de_genes_unique <- de_genes %>%
  filter(abs(avg_log2FC) > 0) %>%  # Filter out genes with fold change exactly 0 (if any)
  group_by(gene) %>%
  slice_min(order_by = p_val_adj, n = 1, with_ties = FALSE) %>%  # Keep only the best result per gene
  ungroup()

p <- SCpubr::do_VolcanoPlot(
  sample = sample,
  de_genes = de_genes
)


# Display the plot
print(p)

```

# 3. Volcano Plot 1
```{r , fig.height=8, fig.width=10}
library(ggplot2)
library(dplyr)
library(ggrepel)

# Filter significant genes
de_genes_sig <- de_genes %>%
  filter(!is.na(p_val_adj)) %>%
  mutate(direction = case_when(
    avg_log2FC >= 1 & p_val_adj < 0.05 ~ "Upregulated",
    avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
    TRUE ~ "NS"
  ))

# Select top 50 up and 50 downregulated genes
top_genes <- de_genes_sig %>%
  filter(direction %in% c("Upregulated", "Downregulated")) %>%
  group_by(direction) %>%
  slice_min(order_by = p_val_adj, n = 50) %>%
  ungroup()

# Volcano plot
volcano <- ggplot(de_genes_sig, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
  geom_point(aes(color = direction), alpha = 0.7, size = 2.5) +  # Increased dot size here
  scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "grey40") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "grey40") +
  geom_text_repel(
    data = top_genes,
    aes(label = gene),
    size = 4,
    max.overlaps = Inf,  # No limit on overlaps
    box.padding = 0.5,
    segment.size = 0.3
  ) +
  labs(
    title = "Volcano Plot: Malignant vs Normal CD4+ T cells",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5)
  )

# Display or save
print(volcano)
# ggsave("volcano_plot.pdf", plot = volcano, width = 8, height = 6)


```


## Volcano Plot 2
```{r , fig.height=8, fig.width=10}
library(ggplot2)
library(dplyr)
library(ggrepel)

# ---- Step 1: Prepare Data ----
de_genes <- de_genes %>%
  mutate(
    log10_pval = -log10(p_val_adj),
    regulation = case_when(
      avg_log2FC >= 1.5 & p_val_adj < 0.05 ~ "Upregulated",
      avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
      TRUE ~ "NS"
    )
  )

# ---- Step 2: Top 50 Up and 50 Downregulated Genes ----
top_genes <- de_genes %>%
  filter(regulation %in% c("Upregulated", "Downregulated")) %>%
  group_by(regulation) %>%
  slice_min(order_by = p_val_adj, n = 50) %>%
  ungroup()


# ---- Step 3: Plot ----
p3 <- ggplot(de_genes, aes(x = avg_log2FC, y = log10_pval)) +
  geom_point(aes(color = regulation), size = 3, alpha = 0.7) +
  scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
  geom_vline(xintercept = c(-1, 1.5), linetype = "dashed", color = "grey50") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "grey50") +
  geom_label_repel(
    data = top_genes,
    mapping = aes(label = gene),
    size = 3.5,              # text size
    max.overlaps = 100,
    box.padding = 0.3,
    segment.size = 0.3,
    min.segment.length = 0
  ) +
  labs(
    title = "Volcano Plot: Malignant vs Normal CD4+ T cells",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "right"
  )

# ---- Step 4: Display Plot ----
print(p3)

```

## Volcano Plot 3
```{r , fig.height=8, fig.width=10}
# Step 1: Store Your Gene Lists in R

# Upregulated genes
up_genes <- c("MYL6B", "CENPF", "CDT1", "CRNDE", "CHEK1", "TPX2", "UBE2T", "CDCA8", "CDKN3", "PCLAF", 
              "ASF1B", "MCM2", "MYBL2", "PKMYT1", "SLC25A5", "NUSAP1", "CENPW", "AHCY", "TTF2", "COX5A", 
              "FH", "CKS1B", "UCK2", "CKS2", "CCNB1", "PYCR1", "MAPKAPK3", "NCAPD2", "TRIP13", "RBBP8", 
              "TUBA1C", "RNASEH2A", "BRCA1", "KPNA2", "NME1", "NABP2", "GTSF1", "CDC20", "SNX10", "MAD2L1", 
              "YWHAH", "UBE2C", "TOP2A", "EBP", "CLIC1", "GINS2", "SLC29A1", "SRI", "ORC6", "PLK1")

# Downregulated genes
down_genes <- c("SARAF", "PCED1B-AS1", "SNHG5", "BTG1", "SRSF5", "IKZF1", "PRMT2", "RBL2", "PACS1", "MAX",
                "ZBTB20", "PIK3IP1", "RAPGEF6", "IL7R", "N4BP2L2", "SF1", "RPS27", "RIPOR2", "RSRP1", "DAZAP2",
                "ZFP36L1", "TXNIP", "HLA-E", "DGKA", "SON", "DDX6", "RASA3", "VAMP2", "PNRC1", "CDKN1B",
                "PPP2R5C", "LEPROTL1", "EPC1", "CD247", "FOXP1", "PBXIP1", "LINC01578", "CIRBP", "ABLIM1", 
                "SUN2", "ZFP36", "ZFP36L2", "AL138963.4", "ETS1", "RNASET2", "HIF1A", "PNISR", "R3HDM4", 
                "ANKRD44", "DDX24")

#Step 2: Process DE Data and Add Regulation Category
highlight_genes <- c(up_genes, down_genes)

library(dplyr)

de_genes_processed <- de_genes %>%
  filter(!is.na(p_val_adj)) %>%
  mutate(
    log10_pval = -log10(p_val_adj),
    regulation = case_when(
      avg_log2FC >= 1.5 & p_val_adj < 0.05 ~ "Upregulated",
      avg_log2FC <= -1 & p_val_adj < 0.05 ~ "Downregulated",
      TRUE ~ "NS"
    ),
    highlight = ifelse(gene %in% highlight_genes, gene, NA)
  )

# Step 3: Make Volcano Plot with Highlighted Genes

library(ggplot2)
library(ggrepel)

ggplot(de_genes_processed, aes(x = avg_log2FC, y = log10_pval)) +
  geom_point(aes(color = regulation), alpha = 0.7, size = 2.8) +
  scale_color_manual(values = c("Upregulated" = "#D62728", "Downregulated" = "#1F77B4", "NS" = "lightgrey")) +
  geom_vline(xintercept = c(-1, 1.5), linetype = "dashed", color = "gray50") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
  geom_label_repel(
    data = filter(de_genes_processed, !is.na(highlight)),
    aes(label = highlight),
    size = 3.2,
    box.padding = 0.3,
    max.overlaps = 100,
    segment.size = 0.3
  ) +
  labs(
    title = "Volcano Plot Highlighting Selected Biomarkers",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5))

```




## Volcano Plot 4
```{r , fig.height=12, fig.width=16}
# Step 1: Store Your Gene Lists in R

# Upregulated genes
up_genes <- c("MYL6B", "CENPF", "CDT1", "CRNDE", "CHEK1", "TPX2", "UBE2T", "CDCA8", "CDKN3", "PCLAF", 
              "ASF1B", "MCM2", "MYBL2", "PKMYT1", "SLC25A5", "NUSAP1", "CENPW", "AHCY", "TTF2", "COX5A", 
              "FH", "CKS1B", "UCK2", "CKS2", "CCNB1", "PYCR1", "MAPKAPK3", "NCAPD2", "TRIP13", "RBBP8", 
              "TUBA1C", "RNASEH2A", "BRCA1", "KPNA2", "NME1", "NABP2", "GTSF1", "CDC20", "SNX10", "MAD2L1", 
              "YWHAH", "UBE2C", "TOP2A", "EBP", "CLIC1", "GINS2", "SLC29A1", "SRI", "ORC6", "PLK1")

# Downregulated genes
down_genes <- c("SARAF", "PCED1B-AS1", "SNHG5", "BTG1", "SRSF5", "IKZF1", "PRMT2", "RBL2", "PACS1", "MAX",
                "ZBTB20", "PIK3IP1", "RAPGEF6", "IL7R", "N4BP2L2", "SF1", "RPS27", "RIPOR2", "RSRP1", "DAZAP2",
                "ZFP36L1", "TXNIP", "HLA-E", "DGKA", "SON", "DDX6", "RASA3", "VAMP2", "PNRC1", "CDKN1B",
                "PPP2R5C", "LEPROTL1", "EPC1", "CD247", "FOXP1", "PBXIP1", "LINC01578", "CIRBP", "ABLIM1", 
                "SUN2", "ZFP36", "ZFP36L2", "AL138963.4", "ETS1", "RNASET2", "HIF1A", "PNISR", "R3HDM4", 
                "ANKRD44", "DDX24")

# Custom theme for Nature-style plots
nature_theme <- function(base_size = 12, base_family = "sans") {
  theme_minimal(base_size = base_size, base_family = base_family) +
    theme(
      text = element_text(color = "black"),
      plot.title = element_text(hjust = 0.5, face = "bold", size = rel(1.2)),
      axis.title = element_text(face = "bold", size = rel(1)),
      axis.text = element_text(size = rel(0.9)),
      legend.position = c(0.85, 0.85),
      legend.background = element_rect(fill = "white", color = NA),
      panel.grid.major = element_line(linewidth = 0.25, color = "grey90"),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      axis.line = element_line(linewidth = 0.5, color = "black")
    )
}

# Enhanced volcano plot with bold text and 1 point bigger size
ggplot(de_genes_processed, aes(x = avg_log2FC, y = log10_pval)) +
  geom_point(
    aes(color = regulation, alpha = regulation, size = regulation),
    shape = 16
  ) +
  scale_color_manual(
    values = c(
      "Upregulated" = "#D32F2F",  # Nature-style red
      "Downregulated" = "#1976D2",  # Nature-style blue
      "NS" = "grey60"
    ),
    breaks = c("Upregulated", "Downregulated")
  ) +
  scale_alpha_manual(
    values = c(
      "Upregulated" = 0.8,  # Slightly reduced opacity for better contrast
      "Downregulated" = 0.8,
      "NS" = 0.5
    ),
    guide = "none"
  ) +
  scale_size_manual(
    values = c(
      "Upregulated" = 3.5,  # Slightly larger size for upregulated
      "Downregulated" = 3.5,
      "NS" = 2
    ),
    guide = "none"
  ) +
  geom_vline(
    xintercept = c(-1.5, 1.5),  # Adjust threshold for more specific markers
    linetype = "dashed",
    color = "grey40",
    linewidth = 0.6
  ) +
  geom_hline(
    yintercept = -log10(0.05),
    linetype = "dashed",
    color = "grey40",
    linewidth = 0.6
  ) +
  geom_text_repel(
    data = filter(de_genes_processed, !is.na(highlight)),
    aes(label = highlight),
    size = 4.5,  # Increase text size by 1 point
    fontface = "bold",  # Make text bold
    box.padding = 0.45,
    point.padding = 0.3,
    segment.color = "grey30",
    segment.size = 0.4,
    max.overlaps = 25,
    min.segment.length = 0.1,
    force = 2,
    color = "black",
    bg.color = "white",
    bg.r = 0.15
  ) +
  annotate(
    "text",
    x = c(-2.8, 2.8),
    y = rep(max(de_genes_processed$log10_pval) * 0.95, 2),
    label = c("Downregulated", "Upregulated"),
    color = c("#1976D2", "#D32F2F"),
    size = 5,  # Increase text size by 1 point
    fontface = "bold"
  ) +
  labs(
    title = "Volcano Plot of Differentially Expressed Genes",
    x = expression(Log[2]~fold~change),
    y = expression(-Log[10]~adjusted~italic(p)),
    color = "Gene Regulation"
  ) +
  nature_theme() +
  guides(
    color = guide_legend(override.aes = list(size = 4, alpha = 1))
  ) +
  coord_cartesian(clip = "off") +
  scale_x_continuous(expand = expansion(mult = 0.1)) +
  scale_y_continuous(expand = expansion(mult = 0.05))


```

## Heatmap-1
```{r , fig.height=22, fig.width=36}

library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(viridis)

# 1. Prepare gene lists with error checking
all_genes <- unique(c(up_genes, down_genes))
all_genes <- intersect(all_genes, rownames(GetAssayData(sample, assay = "RNA"))) # Corrected line

if(length(all_genes) == 0) stop("No genes from the lists found in the Seurat object")

# 2. Set default assay to "RNA" and scale the data
DefaultAssay(sample) <- "RNA"  # Set the default assay to RNA
sample <- ScaleData(sample)  # Scale the RNA data

# 3. Extract scaled data
expr_matrix <- GetAssayData(sample, assay = "RNA", slot = "scale.data")[all_genes, ]

# 4. Create metadata annotations
gene_annotation <- data.frame(
  Gene = all_genes,
  Regulation = ifelse(all_genes %in% up_genes, "Up", "Down"),
  row.names = all_genes
)

# Order by clusters
cluster_order <- order(sample$seurat_clusters)
expr_matrix <- expr_matrix[, cluster_order]

# 5. Create annotations
column_ha <- HeatmapAnnotation(
  Cluster = sample$seurat_clusters[cluster_order],
  col = list(Cluster = setNames(
    viridis(length(unique(sample$seurat_clusters))),
    unique(sample$seurat_clusters)
  )),
  annotation_name_side = "left",
  show_legend = TRUE
)

row_ha <- rowAnnotation(
  Regulation = gene_annotation[all_genes, "Regulation"],
  col = list(Regulation = c("Up" = "#D32F2F", "Down" = "#1976D2")),
  show_annotation_name = FALSE
)

# 6. Create heatmap
ht <- Heatmap(expr_matrix,
  name = "Z-score",
  col = colorRamp2(c(-2, 0, 2), c("#1E88E5", "white", "#D81B60")),
  cluster_columns = FALSE,
  cluster_rows = TRUE,
  show_row_names = TRUE,
  show_column_names = FALSE,
  row_names_gp = gpar(fontsize = 8),
  column_title_gp = gpar(fontsize = 10),
  top_annotation = column_ha,
  left_annotation = row_ha,
  heatmap_legend_param = list(
    title_gp = gpar(fontsize = 10),
    labels_gp = gpar(fontsize = 8)
  ),
  row_title = NULL,
  column_split = sample$seurat_clusters, # Corrected this line
  row_dend_width = unit(4, "cm"),
  use_raster = TRUE
)

# 7. Display in notebook
draw(ht, padding = unit(c(2, 2, 2, 2), "mm"),
     heatmap_legend_side = "right",
     annotation_legend_side = "right")

# 8. Save outputs
# PDF for publication
pdf("publication_heatmap.pdf", width = 8.7/2.54, height = 11/2.54) # Convert cm to inches
draw(ht)
dev.off()

# PNG for preview
png("heatmap_preview.png", width = 5000, height = 2500, res = 300)
draw(ht)
dev.off()



```


## Heatmap-2
```{r , fig.height=22, fig.width=36}

library(Seurat)
library(ComplexHeatmap)
library(circlize)
library(viridis)

# 1. Prepare gene lists with error checking
all_genes <- unique(c(up_genes, down_genes))
all_genes <- intersect(all_genes, rownames(GetAssayData(sample, assay = "RNA")))  # Corrected line

if(length(all_genes) == 0) stop("No genes from the lists found in the Seurat object")

# 2. Set default assay to "RNA" and scale the data
DefaultAssay(sample) <- "RNA"  # Set the default assay to RNA
#sample <- ScaleData(sample)  # Scale the RNA data

# 3. Extract scaled data
expr_matrix <- GetAssayData(sample, assay = "RNA", slot = "scale.data")[all_genes, ]

# 4. Create metadata annotations
gene_annotation <- data.frame(
  Gene = all_genes,
  Regulation = ifelse(all_genes %in% up_genes, "Up", "Down"),
  row.names = all_genes
)

# Order by clusters
cluster_order <- order(sample$seurat_clusters)
expr_matrix <- expr_matrix[, cluster_order]

# 5. Create annotations
column_ha <- HeatmapAnnotation(
  Cluster = sample$seurat_clusters[cluster_order],
  col = list(Cluster = setNames(
    viridis(length(unique(sample$seurat_clusters))),
    unique(sample$seurat_clusters)
  )),
  annotation_name_side = "left",
  show_legend = TRUE
)

row_ha <- rowAnnotation(
  Regulation = gene_annotation[all_genes, "Regulation"],
  col = list(Regulation = c("Up" = "#D32F2F", "Down" = "#1976D2")),
  show_annotation_name = FALSE
)

# 6. Create heatmap with additional improvements
ht <- Heatmap(expr_matrix,
  name = "Z-score",
  col = colorRamp2(c(-2, 0, 2), c("#1E88E5", "white", "#D81B60")),  # Adjust color scheme for better contrast
  cluster_rows = TRUE,  # Cluster rows (genes)
  cluster_columns = TRUE,  # Cluster columns (samples)
  show_row_names = TRUE,
  show_column_names = FALSE,
  row_names_gp = gpar(fontsize = 8),  # Smaller row name font
  column_title_gp = gpar(fontsize = 12, fontface = "bold"),  # Larger column title font
  top_annotation = column_ha,
  left_annotation = row_ha,
  heatmap_legend_param = list(
    title_gp = gpar(fontsize = 10, fontface = "bold"),
    labels_gp = gpar(fontsize = 8),
    direction = "horizontal"  # Place legend horizontally for more room
  ),
  row_title = "Genes",  # Add a row title
  column_split = sample$seurat_clusters,  # Split columns by clusters
  row_dend_width = unit(4, "cm"),
  column_dend_height = unit(4, "cm"),  # Adjust column dendrogram height
  show_row_dend = TRUE,  # Show row dendrogram
  show_column_dend = TRUE,  # Show column dendrogram
  border = TRUE,  # Add borders around the cells
  row_names_side = "left",  # Place row names on the left
  column_names_gp = gpar(fontsize = 8, fontface = "italic"),  # Italicize column names
  use_raster = TRUE  # Use rasterization for better performance
)

# 7. Display in notebook with improved padding and legend side
draw(ht, padding = unit(c(4, 4, 4, 4), "mm"),
     heatmap_legend_side = "right",
     annotation_legend_side = "right")

# 8. Save outputs
# PDF for publication
pdf("publication_heatmap_improved2.pdf", width = 8.7/2.54, height = 11/2.54)  # Convert cm to inches
draw(ht)
dev.off()

# PNG for preview
png("heatmap_preview_improved2.png", width = 2500, height = 3000, res = 300)
draw(ht)
dev.off()


```
