load libraries————————————
1. Read Seurat object (STCAT annotated and Cleaned)
All_samples_Merged <- readRDS("../All_samples_Merged_with_STCAT_and_cleaned.rds")
pbmc <- All_samples_Merged
rm(All_samples_Merged)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8412352 449.3 15367708 820.8 11293490 603.2
Vcells 1227843168 9367.7 1529678067 11670.6 1244138755 9492.1
2. Visualizing Clusters
library(Seurat)
library(SeuratExtend)
Idents(pbmc) <- "Prediction"
# Visualizing cell clusters using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

3. Visualizing Clusters
library(Seurat)
library(SeuratExtend)
Idents(pbmc) <- "Prediction"
# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

Idents(pbmc) <- "orig.ident"
# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

Idents(pbmc) <- "seurat_clusters"
# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

4. Analyzing Cluster Distribution
# Cluster distribution bar plot
ClusterDistrBar(pbmc$orig.ident, pbmc$seurat_clusters)

# Annotations distribution bar plot
ClusterDistrBar(pbmc$orig.ident, pbmc$Prediction)

# Annotations distribution bar plot
ClusterDistrBar(pbmc$Patient_origin, pbmc$Prediction)

5. Marker Gene Analysis with Heatmap
# To examine the marker genes of each cluster and visualize them using a heatmap:
# Calculating z-scores for variable features
# genes.zscore1 <- CalcStats(
# pbmc,
# features = VariableFeatures(pbmc),
# group.by = "orig.ident",
# order = "p",
# n = 5)
# Displaying heatmap
Heatmap(genes.zscore1, lab_fill = "zscore")
Using id as id variables

# Calculating z-scores for variable features
# genes.zscore2 <- CalcStats(
# pbmc,
# features = VariableFeatures(pbmc),
# group.by = "seurat_clusters",
# order = "p",
# n = 5)
# Displaying heatmap
Heatmap(genes.zscore2, lab_fill = "zscore")
Using id as id variables

# Calculating z-scores for variable features
# genes.zscore3 <- CalcStats(
# pbmc,
# features = VariableFeatures(pbmc),
# group.by = "Prediction",
# order = "p",
# n = 5)
#
# Displaying heatmap
Heatmap(genes.zscore3, lab_fill = "zscore")
Using id as id variables

gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 11061468 590.8 19580058 1045.7 19580058 1045.7
Vcells 1243153300 9484.6 2386909928 18210.7 2386909513 18210.7
6. Enhanced Dot Plots (New in v1.1.0)
Idents(pbmc) <- "orig.ident"
# Create grouped features
grouped_features <- list(
"B_cell_markers" = c("MS4A1", "CD79A"),
"T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
"Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)
DotPlot2(pbmc, features = grouped_features)

Idents(pbmc) <- "seurat_clusters"
# Create grouped features
grouped_features <- list(
"B_cell_markers" = c("MS4A1", "CD79A"),
"T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
"Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)
DotPlot2(pbmc, features = grouped_features)

Idents(pbmc) <- "Prediction"
# Create grouped features
grouped_features <- list(
"B_cell_markers" = c("MS4A1", "CD79A"),
"T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
"Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)
DotPlot2(pbmc, features = grouped_features)

##. Enhanced Dot Plots (New in v1.1.0)
# Create grouped features
grouped_features_T <- list(
"Naive_T" = c("CCR7", "SELL", "LEF1", "TCF7"),
"Tcm" = c("CCR7", "IL7R", "CD62L"),
"Tem" = c("GZMK", "CXCR3", "KLRG1"),
"Temra" = c("GZMB", "PRF1", "KLRG1"),
"Treg" = c("FOXP3", "IL2RA", "IKZF2", "TIGIT"),
"Tfh" = c("CXCR5", "BCL6", "PDCD1", "ICOS"),
"Th17" = c("RORC", "IL17A", "IL23R"),
"Tex" = c("PDCD1", "TOX", "LAG3", "CTLA4"),
"Trm" = c("ITGAE", "CD69", "ZNF683"),
"Tisg" = c("ISG15", "IFI6", "MX1", "IFIT3"),
"Proliferation" = c("MKI67", "TOP2A", "STMN1"),
"Cell_Death" = c("BAX", "CASP3", "DFFA", "BCL2L11"),
"Adhesion" = c("ITGAL", "ICAM1", "CD44", "SELL"),
"Activated" = c("CD38", "HLA-DRA", "TNFRSF9", "IL2RA")
)
Idents(pbmc) <- "orig.ident"
DotPlot2(pbmc, features = grouped_features_T)
Warning: The following requested variables were not found: CD62LWarning: Removing duplicate features (keeping first occurrence): CCR7, KLRG1, PDCD1, SELL, IL2RA

Idents(pbmc) <- "seurat_clusters"
DotPlot2(pbmc, features = grouped_features_T)
Warning: The following requested variables were not found: CD62LWarning: Removing duplicate features (keeping first occurrence): CCR7, KLRG1, PDCD1, SELL, IL2RA

Idents(pbmc) <- "Prediction"
DotPlot2(pbmc, features = grouped_features_T)
Warning: The following requested variables were not found: CD62LWarning: Removing duplicate features (keeping first occurrence): CCR7, KLRG1, PDCD1, SELL, IL2RA

7. Analyzing Cluster Distribution
# Specifying genes and cells of interest
genes <- c("SELL", "IL2RA", "CD8A", "TOP2A")
cells <- WhichCells(pbmc, idents = c("CD4 Tn", "CD4 Treg", "CD4 Trm", "CD4 proliferation"))
# Violin plot with statistical analysis
VlnPlot2(
pbmc,
features = genes,
group.by = "Prediction",
cells = cells,
stat.method = "wilcox.test")

Displaying three markers on a single UMAP
FeaturePlot3(pbmc, feature.1 = "CD3D", feature.2 = "CD7", feature.3 = "CD8A", pt.size = 1)


FeaturePlot3(pbmc, feature.1 = "IL2RA", feature.2 = "CD69", feature.3 = "FOXP3", pt.size = 1)

FeaturePlot3(pbmc, feature.1 = "LAG3", feature.2 = "PDCD1", feature.3 = "CTLA4", pt.size = 1)

8. Create a basic volcano plot comparing two cell types:
Idents(pbmc) <- "Prediction"
VolcanoPlot(pbmc,
ident.1 = "CD4 Tn",
ident.2 = "CD4 Trm")
Warning: sparse->dense coercion: allocating vector of size 9.6 GiB
9. Conducting Geneset Enrichment Analysis (GSEA)
go_zscore <- CalcStats(
matr,
f = pbmc$orig.ident,
order = "p",
n = 3)
Heatmap(go_zscore, lab_fill = "zscore")
Using id as id variables

go_zscore <- CalcStats(
matr,
f = pbmc$seurat_clusters,
order = "p",
n = 3)
Heatmap(go_zscore, lab_fill = "zscore")
Using id as id variables

Conducting Geneset Enrichment Analysis (GSEA)
go_zscore <- CalcStats(
matr,
f = pbmc$Prediction,
order = "p",
n = 3)
Heatmap(go_zscore, lab_fill = "zscore")
Using id as id variables

9. Importing and Visualizing SCENIC Analysis
# Downloading a pre-computed SCENIC loom file
scenic_loom_path <- file.path(tempdir(), "pyscenic_integrated-output.loom")
download.file("https://zenodo.org/records/10944066/files/pbmc3k_small_pyscenic_integrated-output.loom", scenic_loom_path, mode = "wb")
trying URL 'https://zenodo.org/records/10944066/files/pbmc3k_small_pyscenic_integrated-output.loom'
Content type 'application/octet-stream' length 33355070 bytes (31.8 MB)
==================================================
downloaded 31.8 MB
# Importing SCENIC Loom Files into Seurat
pbmc <- ImportPyscenicLoom(scenic_loom_path, seu = pbmc)
Warning: Skipping validation step, some fields are not populatedError in ImportPyscenicLoom(scenic_loom_path, seu = pbmc) :
Loom file and Seurat object have different cell numbers
Displaying three markers on a single UMAP
library(Seurat)
library(SeuratExtend)
FeaturePlot3(pbmc, feature.1 = "CD3D", feature.2 = "CD7", feature.3 = "CD8A", pt.size = 1, dark.theme = TRUE)


FeaturePlot3(pbmc, feature.1 = "IL2RA", feature.2 = "CD69", feature.3 = "FOXP3", pt.size = 1, dark.theme = TRUE)

FeaturePlot3(pbmc, feature.1 = "LAG3", feature.2 = "PDCD1", feature.3 = "CTLA4", pt.size = 1, dark.theme = TRUE)

10. New ClusterDistrPlot Function
pbmc$condition <- ifelse(
pbmc$orig.ident %in% paste0("L", 1:7), # Malignant samples L1 to L7
"Malignant",
ifelse(
pbmc$orig.ident %in% c("CD4T_lab", "CD4T_10x"), # Healthy samples
"Healthy",
"Other" # for any other samples, if present
)
)
# Convert to factor with Malignant first, Healthy second
pbmc$condition <- factor(pbmc$condition, levels = c("Malignant", "Healthy"))
# Compare cluster distribution between conditions
ClusterDistrPlot(
origin = pbmc$orig.ident,
cluster = pbmc$Prediction,
condition = pbmc$condition
)
Attaching package: ‘rlist’
The following object is masked from ‘package:S4Vectors’:
List
The 'I want hue' color presets were generated from: https://medialab.github.io/iwanthue/
This message is shown once per session

11. Enhanced Log Fold Change Options in Differential Analysis
Plots
# Using log base 2 for fold change calculations in WaterfallPlot
WaterfallPlot(
pbmc,
group.by = "Prediction",
features = VariableFeatures(pbmc)[1:80],
ident.1 = "CD4 Tn",
ident.2 = "CD4 Trm",
length = "logFC",
log.base = "2", # Use log2 instead of natural log
top.n = 20)
Warning: no non-missing arguments to min; returning InfWarning: no non-missing arguments to max; returning -InfData range detected as 0-1. Using pseudocount = 0.01 for logFC calculation.
Warning: no non-missing arguments to max; returning -Inf
Error in `fortify()`:
! `data` must be a <data.frame>, or an object coercible by `fortify()`, or a valid
<data.frame>-like object coercible by `as.data.frame()`.
Caused by error in `.prevalidate_data_frame_like_object()`:
! `dim(data)` must return an <integer> of length 2.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.
Save the RDS after changes
saveRDS(All_samples_Merged, file = "STCAT_Annotation/All_samples_Merged_with_STCAT_and_cleaned.rds")

---
title: "Visualization after STCAT and Cleaning using SeuratExtend"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---



## load libraries------------------------------------
```{r setup, include=FALSE}

library(Seurat)
library(SeuratExtend)
library(SeuratExtendData)
library(monocle3)
library(SeuratWrappers)
library(harmony)


# Extra libraries
library(dplyr)
library(pheatmap)
library(ggplot2)
library(SCpubr)

```


# 1. Read Seurat object (STCAT annotated and Cleaned)
```{r loadSeurat}

All_samples_Merged <- readRDS("../All_samples_Merged_with_STCAT_and_cleaned.rds")

pbmc <- All_samples_Merged

rm(All_samples_Merged)

gc()
```



# 2. Visualizing Clusters
```{r, fig.height=6, fig.width=8}

library(Seurat)
library(SeuratExtend)

Idents(pbmc) <- "Prediction"

# Visualizing cell clusters using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())
```


# 3. Visualizing Clusters
```{r, fig.height=6, fig.width=8}

library(Seurat)
library(SeuratExtend)

Idents(pbmc) <- "Prediction"

# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

Idents(pbmc) <- "orig.ident"

# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())

Idents(pbmc) <- "seurat_clusters"

# Visualizing cell Annotations using DimPlot2
DimPlot2(pbmc, theme = theme_umap_arrows())
```


# 4. Analyzing Cluster Distribution
```{r, fig.height=6, fig.width=8}

# Cluster distribution bar plot
ClusterDistrBar(pbmc$orig.ident, pbmc$seurat_clusters)

# Annotations distribution bar plot
ClusterDistrBar(pbmc$orig.ident, pbmc$Prediction)

# Annotations distribution bar plot
ClusterDistrBar(pbmc$Patient_origin, pbmc$Prediction)

```

# 5. Marker Gene Analysis with Heatmap
```{r, fig.height=12, fig.width=12}

# To examine the marker genes of each cluster and visualize them using a heatmap:

# Calculating z-scores for variable features
# genes.zscore1 <- CalcStats(
#   pbmc,
#   features = VariableFeatures(pbmc),
#   group.by = "orig.ident",
#   order = "p",
#   n = 5)
  
# Displaying heatmap
Heatmap(genes.zscore1, lab_fill = "zscore")


# Calculating z-scores for variable features
# genes.zscore2 <- CalcStats(
#   pbmc,
#   features = VariableFeatures(pbmc),
#   group.by = "seurat_clusters",
#   order = "p",
#   n = 5)
  
# Displaying heatmap
Heatmap(genes.zscore2, lab_fill = "zscore")


# Calculating z-scores for variable features
# genes.zscore3 <- CalcStats(
#   pbmc,
#   features = VariableFeatures(pbmc),
#   group.by = "Prediction",
#   order = "p",
#   n = 5)
#   
# Displaying heatmap
Heatmap(genes.zscore3, lab_fill = "zscore")

gc()
```

# 6. Enhanced Dot Plots (New in v1.1.0)
```{r, fig.height=6, fig.width=8}

Idents(pbmc) <- "orig.ident"

# Create grouped features
grouped_features <- list(
  "B_cell_markers" = c("MS4A1", "CD79A"),
  "T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
  "Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)

DotPlot2(pbmc, features = grouped_features)

Idents(pbmc) <- "seurat_clusters"

# Create grouped features
grouped_features <- list(
  "B_cell_markers" = c("MS4A1", "CD79A"),
  "T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
  "Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)

DotPlot2(pbmc, features = grouped_features)

Idents(pbmc) <- "Prediction"

# Create grouped features
grouped_features <- list(
  "B_cell_markers" = c("MS4A1", "CD79A"),
  "T_cell_markers" = c("CD3D", "CD8A", "IL7R"),
  "Myeloid_markers" = c("CD14", "FCGR3A", "S100A8")
)

DotPlot2(pbmc, features = grouped_features)

```

##. Enhanced Dot Plots (New in v1.1.0)
```{r, fig.height=12, fig.width=12}

# Create grouped features
grouped_features_T <- list(
  "Naive_T"       = c("CCR7", "SELL", "LEF1", "TCF7"),
  "Tcm"           = c("CCR7", "IL7R", "CD62L"),
  "Tem"           = c("GZMK", "CXCR3", "KLRG1"),
  "Temra"         = c("GZMB", "PRF1", "KLRG1"),
  "Treg"          = c("FOXP3", "IL2RA", "IKZF2", "TIGIT"),
  "Tfh"           = c("CXCR5", "BCL6", "PDCD1", "ICOS"),
  "Th17"          = c("RORC", "IL17A", "IL23R"),
  "Tex"           = c("PDCD1", "TOX", "LAG3", "CTLA4"),
  "Trm"           = c("ITGAE", "CD69", "ZNF683"),
  "Tisg"          = c("ISG15", "IFI6", "MX1", "IFIT3"),
  "Proliferation" = c("MKI67", "TOP2A", "STMN1"),
  "Cell_Death"    = c("BAX", "CASP3", "DFFA", "BCL2L11"),
  "Adhesion"      = c("ITGAL", "ICAM1", "CD44", "SELL"),
  "Activated"     = c("CD38", "HLA-DRA", "TNFRSF9", "IL2RA")
)


Idents(pbmc) <- "orig.ident"
DotPlot2(pbmc, features = grouped_features_T)

Idents(pbmc) <- "seurat_clusters"

DotPlot2(pbmc, features = grouped_features_T)

Idents(pbmc) <- "Prediction"

DotPlot2(pbmc, features = grouped_features_T)

```

# 7. Analyzing Cluster Distribution
```{r, fig.height=6, fig.width=8}

# Specifying genes and cells of interest
genes <- c("SELL", "IL2RA", "CD8A", "TOP2A")
cells <- WhichCells(pbmc, idents = c("CD4 Tn", "CD4 Treg", "CD4 Trm", "CD4 proliferation"))

# Violin plot with statistical analysis
VlnPlot2(
  pbmc,
  features = genes,
  group.by = "Prediction",
  cells = cells,
  stat.method = "wilcox.test")

```

## Displaying three markers on a single UMAP
```{r, fig.height=6, fig.width=8}
FeaturePlot3(pbmc, feature.1 = "CD3D", feature.2 = "CD7", feature.3 = "CD8A", pt.size = 1)

FeaturePlot3(pbmc, feature.1 = "IL2RA", feature.2 = "CD69", feature.3 = "FOXP3", pt.size = 1)

FeaturePlot3(pbmc, feature.1 = "LAG3", feature.2 = "PDCD1", feature.3 = "CTLA4", pt.size = 1)

```


# 8. Create a basic volcano plot comparing two cell types:
```{r, fig.height=6, fig.width=8}
Idents(pbmc) <- "Prediction"

VolcanoPlot(pbmc, 
            ident.1 = "CD4 Tn",
            ident.2 = "CD4 Trm") # not possible to run as it requires alot of memory, Try it on R server
```


# 9. Conducting Geneset Enrichment Analysis (GSEA)
```{r, fig.height=8, fig.width=10}
Idents(pbmc) <- "orig.ident"

options(spe = "human")
pbmc <- GeneSetAnalysisGO(pbmc, parent = "immune_system_process", n.min = 5)
matr <- RenameGO(pbmc@misc$AUCell$GO$immune_system_process)
go_zscore <- CalcStats(
  matr,
  f = pbmc$orig.ident,
  order = "p",
  n = 3)
Heatmap(go_zscore, lab_fill = "zscore")


go_zscore <- CalcStats(
  matr,
  f = pbmc$seurat_clusters,
  order = "p",
  n = 3)
Heatmap(go_zscore, lab_fill = "zscore")




```
## Conducting Geneset Enrichment Analysis (GSEA)
```{r, fig.height=10, fig.width=12}
go_zscore <- CalcStats(
  matr,
  f = pbmc$Prediction,
  order = "p",
  n = 3)
Heatmap(go_zscore, lab_fill = "zscore")


```


# 9. Importing and Visualizing SCENIC Analysis
```{r, fig.height=6, fig.width=8}
# Downloading a pre-computed SCENIC loom file
scenic_loom_path <- file.path(tempdir(), "pyscenic_integrated-output.loom")
download.file("https://zenodo.org/records/10944066/files/pbmc3k_small_pyscenic_integrated-output.loom", scenic_loom_path, mode = "wb")

# Importing SCENIC Loom Files into Seurat
pbmc <- ImportPyscenicLoom(scenic_loom_path, seu = pbmc)

# Visualizing variables such as cluster, gene expression, and SCENIC regulon activity with customized colors
DimPlot2(
  pbmc,
  features = c("seurat_clusters", "orig.ident", "CEBPA", "tf_CEBPA"),
  cols = list("tf_CEBPA" = "OrRd"),
  theme = NoAxes()
) + theme_umap_arrows()

```
## Displaying three markers on a single UMAP
```{r, fig.height=6, fig.width=8}
library(Seurat)
library(SeuratExtend)

FeaturePlot3(pbmc, feature.1 = "CD3D", feature.2 = "CD7", feature.3 = "CD8A", pt.size = 1, dark.theme = TRUE)

FeaturePlot3(pbmc, feature.1 = "IL2RA", feature.2 = "CD69", feature.3 = "FOXP3", pt.size = 1, dark.theme = TRUE)

FeaturePlot3(pbmc, feature.1 = "LAG3", feature.2 = "PDCD1", feature.3 = "CTLA4", pt.size = 1, dark.theme = TRUE)

```


# 10. New ClusterDistrPlot Function
```{r, fig.height=6, fig.width=8}

pbmc$condition <- ifelse(
  pbmc$orig.ident %in% paste0("L", 1:7),    # Malignant samples L1 to L7
  "Malignant",
  ifelse(
    pbmc$orig.ident %in% c("CD4T_lab", "CD4T_10x"),  # Healthy samples
    "Healthy",
    "Other"  # for any other samples, if present
  )
)

# Convert to factor with Malignant first, Healthy second
pbmc$condition <- factor(pbmc$condition, levels = c("Malignant", "Healthy"))



# Compare cluster distribution between conditions
ClusterDistrPlot(
  origin = pbmc$orig.ident,
  cluster = pbmc$Prediction,
  condition = pbmc$condition
)

```


# 11. Enhanced Log Fold Change Options in Differential Analysis Plots
```{r, fig.height=6, fig.width=8}
# Using log base 2 for fold change calculations in WaterfallPlot
WaterfallPlot(
  pbmc, 
  group.by = "Prediction", 
  features = VariableFeatures(pbmc)[1:80],
  ident.1 = "CD4 Tn", 
  ident.2 = "CD4 Trm", 
  length = "logFC",
  log.base = "2",    # Use log2 instead of natural log
  top.n = 20)

```

## Save the RDS after changes
```{r}


saveRDS(All_samples_Merged, file = "STCAT_Annotation/All_samples_Merged_with_STCAT_and_cleaned.rds")



```
