1. load libraries

2. load seurat object

#Load Seurat Object
load("/home/nabbasi/isilon/To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")



Layers(All_samples_Merged@assays$RNA)
[1] "data"   "counts"

3. clusters vs the rest


DefaultAssay(All_samples_Merged) <- "RNA"

# Normalize the RNA assay
#All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "RNA", normalization.method = "LogNormalize", scale.factor = 10000)


# Compute differentiall expression
markers_genes <- FindAllMarkers(
    All_samples_Merged,
    test.use = "wilcox",
    logfc.threshold = 0.25,
    min.pct = 0.25,
    only.pos = TRUE,
    assay = "RNA"
)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
# Save marker genes to a CSV file
write.csv(markers_genes, file = "Top_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

4. top 25 overexpressed genes for plotting.


markers_genes %>%
    group_by(cluster) %>%
    top_n(-25, p_val_adj) %>%
    # In case of tied p-values, further select the top 25 genes by fold-change
    top_n(25, avg_log2FC) -> top25 
  top25
head(top25)

# Save to PDF with high resolution
pdf("Top25_Markers_Barplots.pdf", width = 12, height = 8)

# Set up multi-panel layout and margins
par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))

# Loop over clusters
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}
# Close the PDF device
dev.off()
null device 
          1 
# Open a PNG device to save the plot
png("Top25_Markers_Barplots.png", width = 2400, height = 1600, res = 300)

# Create the bar plots
par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}

# Close the PNG device
dev.off()
null device 
          1 
# Same plotting commands
P1 <- par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))

for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}



P1
$mfrow
[1] 1 1

$mar
[1] 5.1 4.1 2.1 2.1

5. top 5 overexpressed genes for plotting heatmap

alldata <- All_samples_Merged
  
markers_genes %>%
    group_by(cluster) %>%
    slice_min(p_val_adj, n = 5, with_ties = FALSE) -> top5
# create a scale.data slot for the selected genes
alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
Centering and scaling data matrix

  |                                                                                                                
  |                                                                                                          |   0%
  |                                                                                                                
  |==========================================================================================================| 100%
DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = "seurat_clusters", assay = "RNA")

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_marker_genes_heatmap.png", plot = heatmap_plot, width = 8, height = 6, dpi = 300)
Erreur dans plot_theme(plot) : objet 'heatmap_plot' introuvable

6. top 5 overexpressed genes for plotting dotplot

alldata <- All_samples_Merged
  
DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()

NA
NA

7. scPubr Group-Wise DE analysis Plot

p2 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
                                de_genes = markers_genes,
                                top_genes = 5)
Avis : aucun argument trouvé pour min ; Inf est renvoyéErreur dans if (dmax - dmin < eps) { : 
  valeur manquante là où TRUE / FALSE est requis

8. scPubr Group-Wise DE analysis Plot


library(scCustomize)

top5 <- top5[!duplicated(top5$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, x_lab_rotate = TRUE)

p4


p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, flip_axes = T,
    remove_axis_titles = FALSE)

p5


p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5$gene, k = 8)

                                

p6
[[1]]

[[2]]

7. scPubr Group-Wise DE analysis Plot


library(SCpubr)

# Create a named list of genes for each cluster
genes <- split(top5$gene, top5$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = genes
)

# Print the plot
print(p)


ggsave("Top5_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)

top 5 overexpressed genes for plotting heatmap


#alldata <- All_samples_Merged

library(dplyr)

# Select the top 5 genes per cluster, ensuring unique genes across clusters
top5_U <- markers_genes %>%
    arrange(p_val_adj, desc(avg_log2FC)) %>%  # Sort by p_val_adj (ascending) and avg_log2FC (descending)
    distinct(gene, .keep_all = TRUE) %>%      # Remove duplicate genes across clusters
    group_by(cluster) %>%
    top_n(-5, p_val_adj) %>%                  # Select the top 5 genes by p_val_adj
    top_n(5, avg_log2FC) %>%                  # Further select the top 5 genes by avg_log2FC
    ungroup()

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Scale data for the selected genes
alldata <- ScaleData(alldata, features = as.character(ordered_genes), assay = "RNA")
Centering and scaling data matrix

  |                                                                                                         
  |                                                                                                   |   0%
  |                                                                                                         
  |===================================================================================================| 100%
# Create the heatmap with ordered genes
heatmap_plot <- DoHeatmap(
    alldata,
    features = as.character(ordered_genes),  # Use the ordered genes
    group.by = "seurat_clusters",
    assay = "RNA",
    size = 2.5
)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_U_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

top 5 overexpressed genes for plotting dotplot

# alldata <- All_samples_Merged
#   
# DotPlot(alldata, features = rev(as.character(unique(top5_U$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()
# 
# # Save marker genes to a CSV file
# #write.csv(top25, file = "Top25_markers_genes_RNA_Wilcox.csv", row.names = TRUE)
# 
# 
# # Save marker genes to a CSV file
# write.csv(top5_U, file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

library(dplyr)

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Create the DotPlot with ordered genes
dotplot <- DotPlot(
    alldata,
    features = rev(as.character(ordered_genes)),  # Reverse the order for coord_flip()
    group.by = "seurat_clusters",
    assay = "RNA"
) + coord_flip()

# Display the DotPlot
print(dotplot)


# Save the DotPlot as a high-resolution image
ggsave("top5_U_marker_genes_dotplot.png", plot = dotplot, width = 12, height = 8, dpi = 300)

# Save the ordered marker genes to a CSV file
write.csv(top5_U %>% arrange(cluster), file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

scPubr Group-Wise DE analysis Plot


library(scCustomize)

top5_U <- top5_U[!duplicated(top5_U$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, x_lab_rotate = TRUE)

p4


p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, flip_axes = T,
    remove_axis_titles = FALSE)

p5


p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = ordered_genes, k = 8)

                                

p6
[[1]]

[[2]]

scPubr Group-Wise DE analysis Plot


library(SCpubr)

# Create a named list of genes for each cluster
#genes <- split(top5_U$gene, top5_U$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = ordered_genes
)

# Print the plot
print(p)

ggsave("top5_U_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)
---
title: "Wilcox Analysis RNA Top Markers"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: yes
    toc_float: yes
    toc_collapsed: yes
  word_document:
    toc: yes
  html_document:
    toc: yes
    df_print: paged
  pdf_document:
    toc: yes
---

# 1. load libraries
```{r setup, include=FALSE}

suppressPackageStartupMessages({
    library(Seurat)
    library(dplyr)
    library(patchwork)
    library(ggplot2)
    library(pheatmap)
    library(enrichR)
    library(Matrix)
    library(edgeR)
    library(MAST)
})

```


# 2. load seurat object
```{r load_seurat}
#Load Seurat Object
load("/home/nabbasi/isilon/To_Transfer_between_computers/23-Harmony_Integration/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")



Layers(All_samples_Merged@assays$RNA)





```

# 3. clusters vs the rest
```{r , fig.height=14, fig.width=18}

DefaultAssay(All_samples_Merged) <- "RNA"

# Normalize the RNA assay
#All_samples_Merged <- NormalizeData(All_samples_Merged, assay = "RNA", normalization.method = "LogNormalize", scale.factor = 10000)


# Compute differentiall expression
markers_genes <- FindAllMarkers(
    All_samples_Merged,
    test.use = "wilcox",
    logfc.threshold = 0.25,
    min.pct = 0.25,
    only.pos = TRUE,
    assay = "RNA"
)

# Save marker genes to a CSV file
write.csv(markers_genes, file = "Top_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

```
# 4. top 25 overexpressed genes for plotting.
```{r , fig.height=14, fig.width=18}

markers_genes %>%
    group_by(cluster) %>%
    top_n(-25, p_val_adj) %>%
    # In case of tied p-values, further select the top 25 genes by fold-change
    top_n(25, avg_log2FC) -> top25 
  top25
head(top25)

# Save to PDF with high resolution
pdf("Top25_Markers_Barplots.pdf", width = 12, height = 8)

# Set up multi-panel layout and margins
par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))

# Loop over clusters
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}
# Close the PDF device
dev.off()

# Open a PNG device to save the plot
png("Top25_Markers_Barplots.png", width = 2400, height = 1600, res = 300)

# Create the bar plots
par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}

# Close the PNG device
dev.off()

# Same plotting commands
P1 <- par(mfrow = c(2, 5), mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F),
        horiz = T, las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i"
    )
    abline(v = c(0, 0.25), lty = c(1, 2))
}


P1






```



# 5. top 5 overexpressed genes for plotting heatmap
```{r , fig.height=14, fig.width=18}
alldata <- All_samples_Merged
  
markers_genes %>%
    group_by(cluster) %>%
    slice_min(p_val_adj, n = 5, with_ties = FALSE) -> top5
# create a scale.data slot for the selected genes
alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
heatmap_plot <- DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = "seurat_clusters", assay = "RNA", size = 2.5)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)


```


# 6. top 5 overexpressed genes for plotting dotplot
```{r , fig.height=14, fig.width=18}
alldata <- All_samples_Merged
  
DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()

# Save marker genes to a CSV file
write.csv(top25, file = "Top25_markers_genes_RNA_Wilcox.csv", row.names = TRUE)


# Save marker genes to a CSV file
write.csv(top5, file = "Top5_markers_genes_RNA_Wilcox.csv", row.names = TRUE)
```

# 7. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

# library(SCpubr)
# 
# p1 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 min.cutoff = 1)
# 
# p1
# 
# p2 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 top_genes = 5)
# 
# p2
# 
# p3 <- SCpubr::do_GroupwiseDEPlot(sample = alldata,
#                                 de_genes = markers_genes,
#                                 group.by = c("seurat_clusters", 
#                                              "cell_line", 
#                                              "Patient_origin"),
#                                 row_title_expression = c("",
#                                                          "cell_line",
#                                                          "Donor"))
#       
# 
# p3

```


# 8. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(scCustomize)

top5 <- top5[!duplicated(top5$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = top5$gene, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = top5$gene, k = 8)
                                

p6

```
# 7. scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(SCpubr)

# Create a named list of genes for each cluster
genes <- split(top5$gene, top5$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = genes
)

# Print the plot
print(p)

ggsave("Top5_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)





```
## top 5 overexpressed genes for plotting heatmap
```{r , fig.height=14, fig.width=18}

#alldata <- All_samples_Merged

library(dplyr)

# Select the top 5 genes per cluster, ensuring unique genes across clusters
top5_U <- markers_genes %>%
    arrange(p_val_adj, desc(avg_log2FC)) %>%  # Sort by p_val_adj (ascending) and avg_log2FC (descending)
    distinct(gene, .keep_all = TRUE) %>%      # Remove duplicate genes across clusters
    group_by(cluster) %>%
    top_n(-5, p_val_adj) %>%                  # Select the top 5 genes by p_val_adj
    top_n(5, avg_log2FC) %>%                  # Further select the top 5 genes by avg_log2FC
    ungroup()

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Scale data for the selected genes
alldata <- ScaleData(alldata, features = as.character(ordered_genes), assay = "RNA")

# Create the heatmap with ordered genes
heatmap_plot <- DoHeatmap(
    alldata,
    features = as.character(ordered_genes),  # Use the ordered genes
    group.by = "seurat_clusters",
    assay = "RNA",
    size = 2.5
)

# Save the heatmap as a PNG image (with 300 dpi for high-quality publication)
ggsave("top5_U_marker_genes_heatmap.png", plot = heatmap_plot, width = 12, height = 8, dpi = 300)

```


## top 5 overexpressed genes for plotting dotplot
```{r , fig.height=14, fig.width=18}
# alldata <- All_samples_Merged
#   
# DotPlot(alldata, features = rev(as.character(unique(top5_U$gene))), group.by = "seurat_clusters", assay = "RNA") + coord_flip()
# 
# # Save marker genes to a CSV file
# #write.csv(top25, file = "Top25_markers_genes_RNA_Wilcox.csv", row.names = TRUE)
# 
# 
# # Save marker genes to a CSV file
# write.csv(top5_U, file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)

library(dplyr)

# Order genes by cluster
ordered_genes <- top5_U %>%
    arrange(cluster) %>%  # Sort by cluster
    pull(gene)            # Extract the ordered gene names as a vector

# Create the DotPlot with ordered genes
dotplot <- DotPlot(
    alldata,
    features = rev(as.character(ordered_genes)),  # Reverse the order for coord_flip()
    group.by = "seurat_clusters",
    assay = "RNA"
) + coord_flip()

# Display the DotPlot
print(dotplot)

# Save the DotPlot as a high-resolution image
ggsave("top5_U_marker_genes_dotplot.png", plot = dotplot, width = 12, height = 8, dpi = 300)

# Save the ordered marker genes to a CSV file
write.csv(top5_U %>% arrange(cluster), file = "top5_U_markers_genes_RNA_Wilcox.csv", row.names = TRUE)



```







## scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(scCustomize)

top5_U <- top5_U[!duplicated(top5_U$gene), ]

p4 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, x_lab_rotate = TRUE)

p4

p5 <- DotPlot_scCustom(seurat_object = All_samples_Merged, features = ordered_genes, flip_axes = T,
    remove_axis_titles = FALSE)

p5

p6 <- Clustered_DotPlot(seurat_object = All_samples_Merged, features = ordered_genes, k = 8)
                                

p6

```



## scPubr Group-Wise DE analysis Plot
```{r , fig.height=14, fig.width=18}

library(SCpubr)

# Create a named list of genes for each cluster
#genes <- split(top5_U$gene, top5_U$cluster)

# Generate the dot plot
p <- SCpubr::do_DotPlot(
    sample = All_samples_Merged,  # Replace with your Seurat object
    features = ordered_genes
)

# Print the plot
print(p)

ggsave("top5_U_Genes_Per_Cluster_DotPlot.png", plot = p, width = 12, height = 8, dpi = 300)





```














