1. load libraries

2. load seurat object

#Load Seurat Object
load("/home/nabbasi/isilon/To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")



Layers(All_samples_Merged@assays$RNA)
[1] "data"   "counts"

3. clusters vs the rest

markers_genes %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 5) %>%
    ungroup() -> top5
DoHeatmap(All_samples_Merged, features = top5$gene) + NoLegend()
Avis : Layer ‘scale.data’ is emptyErreur dans DoHeatmap(All_samples_Merged, features = top5$gene) : 
  No requested features found in the scale.data slot for the RNA assay.

4. top 25 overexpressed genes for plotting.

# Same plotting commands
P1 <- par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}



P1
$mfrow
[1] 1 1

$mar
[1] 5.1 4.1 2.1 2.1

5. top 5 overexpressed genes for plotting heatmap

#alldata <- All_samples_Merged

# create a scale.data slot for the selected genes
alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
Centering and scaling data matrix

  |                                                                                        
  |                                                                                  |   0%
  |                                                                                        
  |==================================================================================| 100%
heatmap_plot <- DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = "seurat_clusters", assay = "RNA", size = 2.5)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

heatmap_plot

6. top 5 overexpressed genes for plotting dotplot

#alldata <- All_samples_Merged
  
DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()

7. scPubr Group-Wise DE analysis Plot


# library(SCpubr)
# 
# p1 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 min.cutoff = 1)
# 
# p1
# 
# p2 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 top_genes = 5)
# 
# p2
# 
# p3 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 group.by = c("seurat_clusters", 
#                                              "cell_line", 
#                                              "Patient_origin"),
#                                 row_title_expression = c("",
#                                                          "cell_line",
#                                                          "Donor"))
#       
# 
# p3

8. scPubr Group-Wise DE analysis Plot


library(scCustomize)
scCustomize v3.0.1
If you find the scCustomize useful please cite.
See 'samuel-marsh.github.io/scCustomize/articles/FAQ.html' for citation info.
top5 <- top5[!duplicated(top5$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5$gene, k = 8)
                                

p6
[[1]]

[[2]]

7. scPubr Group-Wise DE analysis Plot

library(SCpubr)

# Create a named list of genes for each cluster
genes <- split(top5$gene, top5$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = genes,flip = TRUE
)
Erreur : Please provide the genes as a simple character vector or set flip to FALSE.

top 5 overexpressed genes for plotting heatmap


#alldata <- All_samples_Merged

library(dplyr)

# Select the top 5 genes per cluster, ensuring unique genes across clusters
top5_U <- markers_genes %>%
    arrange(p_val_adj, desc(avg_log2FC)) %>%  # Sort by p_val_adj (ascending) and avg_log2FC (descending)
    distinct(gene, .keep_all = TRUE) %>%      # Remove duplicate genes across clusters
    group_by(cluster) %>%
    top_n(-5, p_val_adj) %>%                  # Select the top 5 genes by p_val_adj
    top_n(5, avg_log2FC) %>%                  # Further select the top 5 genes by avg_log2FC
    ungroup()

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Scale data for the selected genes
alldata <- ScaleData(alldata, features = as.character(ordered_genes), assay = "RNA")

# Create the heatmap with ordered genes
heatmap_plot <- DoHeatmap(
    alldata,
    features = as.character(ordered_genes),  # Use the ordered genes
    group.by = "seurat_clusters",
    assay = "RNA",
    size = 2.5
)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_U_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

top 5 overexpressed genes for plotting dotplot

# alldata <- All_samples_Merged
#   
# DotPlot(alldata, features = rev(as.character(unique(top5_U$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()
# 
# # Save marker genes to a CSV file
# #write.csv(top25, file = "Top25_markers_genes_RNA_Wilcox.csv", row.names = TRUE)
# 
# 
# # Save marker genes to a CSV file
# write.csv(top5_U, file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

library(dplyr)

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Create the DotPlot with ordered genes
dotplot <- DotPlot(
    alldata,
    features = rev(as.character(ordered_genes)),  # Reverse the order for coord_flip()
    group.by = "seurat_clusters",
    assay = "RNA"
) + coord_flip()

# Display the DotPlot
print(dotplot)

# Save the DotPlot as a high-resolution image
ggsave("top5_U_marker_genes_dotplot.png", plot = dotplot, width = 12, height = 8, dpi = 300)

# Save the ordered marker genes to a CSV file
write.csv(top5_U %>% arrange(cluster), file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

scPubr Group-Wise DE analysis Plot


library(scCustomize)

top5_U <- top5_U[!duplicated(top5_U$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = ordered_genes, k = 8)
                                

p6

scPubr Group-Wise DE analysis Plot


library(SCpubr)

# Create a named list of genes for each cluster
#genes <- split(top5_U$gene, top5_U$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = ordered_genes
)

# Print the plot
print(p)

ggsave("top5_U_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)
---
title: "Wilcox Analysis RNA Top Markers"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
    toc_collapsed: yes
  word_document:
    toc: yes
  html_document:
    toc: yes
    df_print: paged
  pdf_document:
    toc: yes
---

# 1. load libraries
```{r setup, include=FALSE}

suppressPackageStartupMessages({
    library(Seurat)
    library(dplyr)
    library(patchwork)
    library(ggplot2)
    library(pheatmap)
    library(enrichR)
    library(Matrix)
    library(edgeR)
})

```


# 2. load seurat object
```{r load_seurat}
#Load Seurat Object
load("/home/nabbasi/isilon/To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")



Layers(All_samples_Merged@assays$RNA)





```

# 3. clusters vs the rest
```{r , fig.height=14, fig.width=18}

DefaultAssay(All_samples_Merged) <- "RNA"

# Normalize the RNA assay
All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "RNA", normalization.method = "LogNormalize", scale.factor = 10000)


# Compute differential expression
markers_genes <- FindAllMarkers(
    All_samples_Merged,
    test.use = "wilcox",
    logfc.threshold = 0.1,
    min.pct = 0.1,
    return.thresh = 1e-5,
    only.pos = TRUE,
    assay = "RNA"
)


# Optional: add a specificity score (for sorting later)
markers_all <- markers_genes %>%
  mutate(specificity_score = avg_log2FC * (pct.1 - pct.2))

# Get top 5 markers per cluster using specificity or avg_log2FC
top5 <- markers_all %>%
  group_by(cluster) %>%
  slice_max(order_by = specificity_score, n = 5, with_ties = FALSE)


# Get top 10 markers per cluster using specificity or avg_log2FC
top10 <- markers_all %>%
  group_by(cluster) %>%
  slice_max(order_by = specificity_score, n = 10, with_ties = FALSE)

# Get top 25 markers per cluster using specificity or avg_log2FC
top25 <- markers_all %>%
  group_by(cluster) %>%
  slice_max(order_by = specificity_score, n = 25, with_ties = FALSE)


# Save
write.csv(top5, "Top5_markers_per_cluster.csv", row.names = FALSE)
write.csv(top10, "Top10_markers_per_cluster.csv", row.names = FALSE)
write.csv(top25, "Top25_markers_per_cluster.csv", row.names = FALSE)
write.csv(markers_genes, "All_marker_genes_RNA_Wilcox.csv", row.names = FALSE)

# Save marker genes to a CSV file
write.csv(markers_genes, file = "Top_markers_genes_RNA_Wilcox_with_rownames.csv", row.names = TRUE)

```



# 4. top 25 overexpressed genes for plotting.
```{r , fig.height=14, fig.width=18}


# Save to PDF with high resolution
pdf("Top25_Markers_Barplots.pdf", width = 12, height = 8)

# Set up multi-panel layout and margins
par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))

# Loop over clusters
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}
# Close the PDF device
dev.off()


# Same plotting commands
P1 <- par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}


P1


```



# 5. top 5 overexpressed genes for plotting heatmap
```{r , fig.height=14, fig.width=18}
#alldata <- All_samples_Merged

# create a scale.data slot for the selected genes
alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
heatmap_plot <- DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = "seurat_clusters", assay = "RNA", size = 2.5)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

heatmap_plot
```


# 6. top 5 overexpressed genes for plotting dotplot
```{r , fig.height=14, fig.width=18}
#alldata <- All_samples_Merged
  
DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()

```

# 7. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

# library(SCpubr)
# 
# p1 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 min.cutoff = 1)
# 
# p1
# 
# p2 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 top_genes = 5)
# 
# p2
# 
# p3 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 group.by = c("seurat_clusters", 
#                                              "cell_line", 
#                                              "Patient_origin"),
#                                 row_title_expression = c("",
#                                                          "cell_line",
#                                                          "Donor"))
#       
# 
# p3

```


# 8. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(scCustomize)

top5 <- top5[!duplicated(top5$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5$gene, k = 8)
                                

p6

```
# 7. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(SCpubr)

# Create a named list of genes for each cluster
genes <- split(top5$gene, top5$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = genes
)

# Print the plot
print(p)

ggsave("Top5_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)





```
## top 5 overexpressed genes for plotting heatmap
```{r , fig.height=14, fig.width=18}

#alldata <- All_samples_Merged

library(dplyr)

# Select the top 5 genes per cluster, ensuring unique genes across clusters
top5_U <- markers_genes %>%
    arrange(p_val_adj, desc(avg_log2FC)) %>%  # Sort by p_val_adj (ascending) and avg_log2FC (descending)
    distinct(gene, .keep_all = TRUE) %>%      # Remove duplicate genes across clusters
    group_by(cluster) %>%
    top_n(-5, p_val_adj) %>%                  # Select the top 5 genes by p_val_adj
    top_n(5, avg_log2FC) %>%                  # Further select the top 5 genes by avg_log2FC
    ungroup()

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Scale data for the selected genes
alldata <- ScaleData(alldata, features = as.character(ordered_genes), assay = "RNA")

# Create the heatmap with ordered genes
heatmap_plot <- DoHeatmap(
    alldata,
    features = as.character(ordered_genes),  # Use the ordered genes
    group.by = "seurat_clusters",
    assay = "RNA",
    size = 2.5
)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_U_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

```


## top 5 overexpressed genes for plotting dotplot
```{r , fig.height=14, fig.width=18}
# alldata <- All_samples_Merged
#   
# DotPlot(alldata, features = rev(as.character(unique(top5_U$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()
# 
# # Save marker genes to a CSV file
# #write.csv(top25, file = "Top25_markers_genes_RNA_Wilcox.csv", row.names = TRUE)
# 
# 
# # Save marker genes to a CSV file
# write.csv(top5_U, file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

library(dplyr)

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Create the DotPlot with ordered genes
dotplot <- DotPlot(
    alldata,
    features = rev(as.character(ordered_genes)),  # Reverse the order for coord_flip()
    group.by = "seurat_clusters",
    assay = "RNA"
) + coord_flip()

# Display the DotPlot
print(dotplot)

# Save the DotPlot as a high-resolution image
ggsave("top5_U_marker_genes_dotplot.png", plot = dotplot, width = 12, height = 8, dpi = 300)

# Save the ordered marker genes to a CSV file
write.csv(top5_U %>% arrange(cluster), file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)



```







## scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(scCustomize)

top5_U <- top5_U[!duplicated(top5_U$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = ordered_genes, k = 8)
                                

p6

```



## scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(SCpubr)

# Create a named list of genes for each cluster
#genes <- split(top5_U$gene, top5_U$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = ordered_genes
)

# Print the plot
print(p)

ggsave("top5_U_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)





```














