1. load libraries

2. Load Seurat Object


#Load Seurat Object merged from cell lines and a control(PBMC) after filtration and Annotation
SS_All_samples_Merged <- load("SS_All_Sample_Merged_Azimuth_ProjectTils_singleR_ANNOTATION_on_My_UMAP0.7_HPC.Robj")

All_samples_Merged
An object of class Seurat 
63193 features across 49193 samples within 6 assays 
Active assay: SCT (26469 features, 3000 variable features)
 3 layers present: counts, data, scale.data
 5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 4 dimensional reductions calculated: pca, umap, integrated_dr, ref.umap

3. QC Visualization



VlnPlot(
  All_samples_Merged,
  features = c("nFeature_RNA",
               "nCount_RNA",
               "percent.mito"),
  ncol = 3
)


FeatureScatter(All_samples_Merged,
               feature1 = "nCount_RNA",
               feature2 = "nFeature_RNA") +
  geom_smooth(method = 'lm')

##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.


FeatureScatter(All_samples_Merged, 
               feature1 = "nCount_RNA", 
               feature2 = "percent.mito")+
  geom_smooth(method = 'lm')


FeatureScatter(All_samples_Merged, 
               feature1 = "nCount_RNA", 
               feature2 = "nFeature_RNA")+
  geom_smooth(method = 'lm')

4. Normalize data



# Apply SCTransform
#All_samples_Merged <- SCTransform(All_samples_Merged, verbose = TRUE)
                                      

5. Perform PCA


# Variables_genes <- All_samples_Merged@assays$SCT@var.features
# 
# # Exclude genes starting with "HLA-" or "Xist"
# Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^Xist", Variables_genes)]
# 
# 
# # These are now standard steps in the Seurat workflow for visualization and clustering
# All_samples_Merged <- RunPCA(All_samples_Merged,
#                         features = Variables_genes_after_exclusion,
#                         do.print = TRUE, 
#                         pcs.print = 1:5, 
#                         genes.print = 15)

# determine dimensionality of the data
ElbowPlot(All_samples_Merged)

6. Clustering

# All_samples_Merged <- FindNeighbors(All_samples_Merged, 
#                                 dims = 1:8, 
#                                 verbose = FALSE)
# 
# # understanding resolution
# All_samples_Merged <- FindClusters(All_samples_Merged, 
#                                     resolution = 1.2)

# non-linear dimensionality reduction --------------
# All_samples_Merged <- RunUMAP(All_samples_Merged, 
#                           dims = 1:8,
#                           verbose = FALSE)
                                  

# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
UMAPPlot(All_samples_Merged,group.by = "cell_line", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = T)


UMAPPlot(All_samples_Merged,
        group.by = "SCT_snn_res.0.7", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = T)


cluster_table <- table(Idents(All_samples_Merged))

head(cluster_table)

   0    1    2    3    4    5 
5521 5259 5230 3959 3866 3649 

7. Azimuth Annotation

# InstallData("pbmcsca")
# 
# UpdateSeuratObject("pbmcsca")
# 
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcsca")

DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l1",
         label = FALSE,
         label.size = 4)


DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l2",
         label = FALSE,
         label.size = 4)


DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l3",
         label = FALSE,
         label.size = 4)

8. ProjectTils Annotation

#Load reference atlas and query data
# ref <- readRDS(file = "CD4T_human_ref_v1.rds")
# 
# #Run Projection algorithm
# query.projected <- Run.ProjecTILs(All_samples_Merged, ref = ref)



#reference atlas
# DimPlot(ref, label = T)

#Visualize projection
# plot.projection(ref, query.projected, linesize = 0.5, pointsize = 0.5)
# 
# #Plot the predicted composition of the query in terms of reference T cell subtypes
# plot.statepred.composition(ref, query.projected, metric = "Percent")


#All_samples_Merged <- ProjecTILs.classifier(query = All_samples_Merged, ref = ref)
UMAPPlot(All_samples_Merged, group.by = "functional.cluster", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = F)

9.SingleR Annotation

#get reference datasets from celldex package

# monaco.ref <- celldex::MonacoImmuneData()
# hpca.ref <- celldex::HumanPrimaryCellAtlasData()
# dice.ref <- celldex::DatabaseImmuneCellExpressionData()
# bpe.ref <- celldex::BlueprintEncodeData()
# 
# #convert our Seurat object to single cell experiment (SCE)
# sce <- as.SingleCellExperiment(DietSeurat(All_samples_Merged))
# 
# #using SingleR
# monaco.main <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.main)
# monaco.fine <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.fine)
# hpca.main <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.main)
# hpca.fine <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.fine)
# dice.main <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.main)
# dice.fine <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.fine)
# bpe.main <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.main)
# bpe.fine <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.fine)
# 

#summary of general cell type annotations

#table(monaco.main$pruned.labels)
#table(hpca.main$pruned.labels)
#table(dice.main$pruned.labels)
#table(bpe.main$pruned.labels)

#The finer cell types annotations are you after, the harder they are to get reliably. 
#This is where comparing many databases, as well as using individual markers from literature, 
#would all be very valuable.

#table(monaco.fine$pruned.labels)
#table(hpca.fine$pruned.labels)
#table(dice.fine$pruned.labels)
#table(bpe.fine$pruned.labels)



#add the annotations to the Seurat object metadata
# All_samples_Merged@meta.data$monaco.main <- monaco.main$pruned.labels
# All_samples_Merged@meta.data$monaco.fine <- monaco.fine$pruned.labels
# #
# All_samples_Merged@meta.data$hpca.main   <- hpca.main$pruned.labels
# All_samples_Merged@meta.data$hpca.fine   <- hpca.fine$pruned.labels
# #  
# All_samples_Merged@meta.data$dice.main   <- dice.main$pruned.labels
# All_samples_Merged@meta.data$dice.fine   <- dice.fine$pruned.labels
# # 
# All_samples_Merged@meta.data$bpe.main   <- bpe.main$pruned.labels
# All_samples_Merged@meta.data$bpe.fine   <- bpe.fine$pruned.labels


All_samples_Merged <- SetIdent(All_samples_Merged, value = "hpca.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 



 All_samples_Merged <- SetIdent(All_samples_Merged, value = "hpca.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  
  
  All_samples_Merged <- SetIdent(All_samples_Merged, value = "dice.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 



 All_samples_Merged <- SetIdent(All_samples_Merged, value = "dice.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 

  
  All_samples_Merged <- SetIdent(All_samples_Merged, value = "bpe.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 



 All_samples_Merged <- SetIdent(All_samples_Merged, value = "bpe.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 




All_samples_Merged <- SetIdent(All_samples_Merged, value = "monaco.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 



 All_samples_Merged <- SetIdent(All_samples_Merged, value = "monaco.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 

  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 

NA
---
title: "UMAP without correction+Annotated"
author: Nasir Mahmood Abbasi
date: "2024-04-23"
output:
  html_notebook: 
    toc: true
    toc_float: true
    toc_collapsed: true
    theme: darkly
---

# 1. load libraries
```{r setup, include=FALSE}

library(Seurat)
library(SeuratObject)
library(SeuratData)
library(patchwork)
# 
# library(dplyr)
# library(tidyverse)
# library(ggplot2)
# library(RColorBrewer)
# library(magrittr)
# library(dbplyr)
# library(rmarkdown)
# library(knitr)
# library(tinytex)
# #Azimuth Annotation libraries
# library(Azimuth)
# #ProjecTils Annotation libraries
# library(STACAS)
# library(ProjecTILs)
# #singleR Annotation libraries
# library(SingleR)
# library(celldex)
# library(SingleCellExperiment)


```
# 2. Load Seurat Object 
```{r load_seurat}

#Load Seurat Object merged from cell lines and a control(PBMC) after filtration and Annotation
SS_All_samples_Merged <- load("SS_All_Sample_Merged_Azimuth_ProjectTils_singleR_ANNOTATION_on_My_UMAP0.7_HPC.Robj")

All_samples_Merged
```
# 3. QC Visualization
```{r QC, fig.height=6, fig.width=10}


VlnPlot(
  All_samples_Merged,
  features = c("nFeature_RNA",
               "nCount_RNA",
               "percent.mito"),
  ncol = 3
)

FeatureScatter(All_samples_Merged,
               feature1 = "nCount_RNA",
               feature2 = "nFeature_RNA") +
  geom_smooth(method = 'lm')

```

##FeatureScatter is typically used to visualize feature-feature relationships
##for anything calculated by the object, 
##i.e. columns in object metadata, PC scores etc.

```{r FC, fig.height=6, fig.width=10}

FeatureScatter(All_samples_Merged, 
               feature1 = "nCount_RNA", 
               feature2 = "percent.mito")+
  geom_smooth(method = 'lm')

FeatureScatter(All_samples_Merged, 
               feature1 = "nCount_RNA", 
               feature2 = "nFeature_RNA")+
  geom_smooth(method = 'lm')

```
# 4. Normalize data
```{r Normalize}


# Apply SCTransform
#All_samples_Merged <- SCTransform(All_samples_Merged, verbose = TRUE)
                                      
```
# 5. Perform PCA
```{r PCA, fig.height=6, fig.width=10}

# Variables_genes <- All_samples_Merged@assays$SCT@var.features
# 
# # Exclude genes starting with "HLA-" or "Xist"
# Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^Xist", Variables_genes)]
# 
# 
# # These are now standard steps in the Seurat workflow for visualization and clustering
# All_samples_Merged <- RunPCA(All_samples_Merged,
#                         features = Variables_genes_after_exclusion,
#                         do.print = TRUE, 
#                         pcs.print = 1:5, 
#                         genes.print = 15)

# determine dimensionality of the data
ElbowPlot(All_samples_Merged)
```
# 6. Clustering
```{r C1, fig.height=6, fig.width=10}
# All_samples_Merged <- FindNeighbors(All_samples_Merged, 
#                                 dims = 1:8, 
#                                 verbose = FALSE)
# 
# # understanding resolution
# All_samples_Merged <- FindClusters(All_samples_Merged, 
#                                     resolution = 1.2)

```


```{r C2, fig.height=6, fig.width=10}

# non-linear dimensionality reduction --------------
# All_samples_Merged <- RunUMAP(All_samples_Merged, 
#                           dims = 1:8,
#                           verbose = FALSE)
                                  

# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
UMAPPlot(All_samples_Merged,group.by = "cell_line", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = T)

UMAPPlot(All_samples_Merged,
        group.by = "SCT_snn_res.0.7", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = T)

cluster_table <- table(Idents(All_samples_Merged))

head(cluster_table)
```
# 7. Azimuth Annotation
```{r azimuth_Annotation, fig.height=6, fig.width=10}
# InstallData("pbmcsca")
# 
# UpdateSeuratObject("pbmcsca")
# 
# # The RunAzimuth function can take a Seurat object as input
# All_samples_Merged <- RunAzimuth(All_samples_Merged, reference = "pbmcsca")

DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l1",
         label = FALSE,
         label.size = 4)

DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l2",
         label = FALSE,
         label.size = 4)

DimPlot(All_samples_Merged,
        pt.size = 1,
         group.by = "predicted.celltype.l3",
         label = FALSE,
         label.size = 4)
```
# 8. ProjectTils Annotation
```{r ProjecTils, fig.height=6, fig.width=10}
#Load reference atlas and query data
# ref <- readRDS(file = "CD4T_human_ref_v1.rds")
# 
# #Run Projection algorithm
# query.projected <- Run.ProjecTILs(All_samples_Merged, ref = ref)



#reference atlas
# DimPlot(ref, label = T)

#Visualize projection
# plot.projection(ref, query.projected, linesize = 0.5, pointsize = 0.5)
# 
# #Plot the predicted composition of the query in terms of reference T cell subtypes
# plot.statepred.composition(ref, query.projected, metric = "Percent")


#All_samples_Merged <- ProjecTILs.classifier(query = All_samples_Merged, ref = ref)
UMAPPlot(All_samples_Merged, group.by = "functional.cluster", 
        reduction = "umap",
        label.size = 3,
        repel = T,
        label = F)
```
# 9.SingleR Annotation
```{r singleR, fig.height=6, fig.width=10}
#get reference datasets from celldex package

# monaco.ref <- celldex::MonacoImmuneData()
# hpca.ref <- celldex::HumanPrimaryCellAtlasData()
# dice.ref <- celldex::DatabaseImmuneCellExpressionData()
# bpe.ref <- celldex::BlueprintEncodeData()
# 
# #convert our Seurat object to single cell experiment (SCE)
# sce <- as.SingleCellExperiment(DietSeurat(All_samples_Merged))
# 
# #using SingleR
# monaco.main <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.main)
# monaco.fine <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.fine)
# hpca.main <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.main)
# hpca.fine <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.fine)
# dice.main <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.main)
# dice.fine <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.fine)
# bpe.main <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.main)
# bpe.fine <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.fine)
# 

#summary of general cell type annotations

#table(monaco.main$pruned.labels)
#table(hpca.main$pruned.labels)
#table(dice.main$pruned.labels)
#table(bpe.main$pruned.labels)

#The finer cell types annotations are you after, the harder they are to get reliably. 
#This is where comparing many databases, as well as using individual markers from literature, 
#would all be very valuable.

#table(monaco.fine$pruned.labels)
#table(hpca.fine$pruned.labels)
#table(dice.fine$pruned.labels)
#table(bpe.fine$pruned.labels)



#add the annotations to the Seurat object metadata
# All_samples_Merged@meta.data$monaco.main <- monaco.main$pruned.labels
# All_samples_Merged@meta.data$monaco.fine <- monaco.fine$pruned.labels
# #
# All_samples_Merged@meta.data$hpca.main   <- hpca.main$pruned.labels
# All_samples_Merged@meta.data$hpca.fine   <- hpca.fine$pruned.labels
# #  
# All_samples_Merged@meta.data$dice.main   <- dice.main$pruned.labels
# All_samples_Merged@meta.data$dice.fine   <- dice.fine$pruned.labels
# # 
# All_samples_Merged@meta.data$bpe.main   <- bpe.main$pruned.labels
# All_samples_Merged@meta.data$bpe.fine   <- bpe.fine$pruned.labels


All_samples_Merged <- SetIdent(All_samples_Merged, value = "hpca.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 


 All_samples_Merged <- SetIdent(All_samples_Merged, value = "hpca.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  
  
  All_samples_Merged <- SetIdent(All_samples_Merged, value = "dice.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 


 All_samples_Merged <- SetIdent(All_samples_Merged, value = "dice.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 
  
  All_samples_Merged <- SetIdent(All_samples_Merged, value = "bpe.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 


 All_samples_Merged <- SetIdent(All_samples_Merged, value = "bpe.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 



All_samples_Merged <- SetIdent(All_samples_Merged, value = "monaco.main")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 


 All_samples_Merged <- SetIdent(All_samples_Merged, value = "monaco.fine")
  DimPlot(All_samples_Merged, label = F , repel = T, label.size = 3) 
  DimPlot(All_samples_Merged, label = T , repel = T, label.size = 3) 
  
```
