Nova-seq_clustering_reduced
- SevenBridges Run -> BD Rhapsodyâ„¢ Targeted Analysis Pipeline run - 06-10-22 08:01:34
Flowchart
Workflow Flowchart.
##########################################################################
####################### Load relevant libraries ##########################
##########################################################################
library(dplyr)
library(tidyr)
library(deMULTIplex)
library(ggplot2)
library(stringr)
library(ShortRead)
library(Seurat)
library(SeuratDisk)
library(SeuratObject)
library(SingleCellExperiment)
library(scater)
library(scran)
library(tibble)# read in
counts_mat_tt3 <- read.csv(file = "C:/Users/Cathal/Desktop/Nova_seq/Nova_Seq_reduced/counts_mat_tt2.csv")
colnames(counts_mat_tt3) <- gsub("X", "", colnames(counts_mat_tt3))
rownames(counts_mat_tt3) <- counts_mat_tt3[,1]
counts_mat_tt3[,1] <- NULL
# as matrix
counts_matrix_reduced <- as.matrix(counts_mat_tt3)
## counts mat reduced, transposed for Seurat
head(counts_matrix_reduced)[1:6,1:10]## 748355 156129 468645 271758 533235 430102
## CCR7.CCR7.AHS0273.pAbO 168668 5281 4530 172 444 1986
## CD11c.B.LY6.ITGAX.AHS0056.pAbO 111 3678 3421 123 202 860
## CD127.IL7R.AHS0028.pAbO 57 3488 3525 74 135 3400
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 164 7220 6553 268 296 1063
## CD137.TNFRSF9.AHS0003.pAbO 120 7677 6995 191 511 894
## CD14.MPHIP9.CD14.AHS0037.pAbO 27 2886 2798 65 103 322
## 229345 579081 243257 240147
## CCR7.CCR7.AHS0273.pAbO 3034 1612 1368 1700
## CD11c.B.LY6.ITGAX.AHS0056.pAbO 2843 693 1061 695
## CD127.IL7R.AHS0028.pAbO 1610 2961 2547 2302
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 4099 702 933 969
## CD137.TNFRSF9.AHS0003.pAbO 4840 643 737 720
## CD14.MPHIP9.CD14.AHS0037.pAbO 1956 227 301 298
###
tail(counts_matrix_reduced)[1:6,1:10]## 748355 156129 468645 271758 533235 430102 229345 579081 243257 240147
## XBP1 0 0 0 0 0 1 4 0 4 0
## YBX3 0 0 0 0 0 0 0 0 8 0
## ZAP70 0 0 0 0 0 0 0 0 0 0
## ZBED2 0 0 0 0 0 0 8 0 0 0
## ZBTB16 0 0 0 0 0 0 0 0 0 0
## ZNF683 0 0 0 0 0 0 0 0 0 0
Construct Seurat / SCE object
##########################################################################
######################## Make SC obj #####################################
##########################################################################
# Seurat
seurat <- CreateSeuratObject(counts = counts_matrix_reduced)
###
## View
seurat## An object of class Seurat
## 427 features across 30388 samples within 1 assay
## Active assay: RNA (427 features, 0 variable features)
head(rownames(seurat))## [1] "CCR7.CCR7.AHS0273.pAbO" "CD11c.B.LY6.ITGAX.AHS0056.pAbO"
## [3] "CD127.IL7R.AHS0028.pAbO" "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
## [5] "CD137.TNFRSF9.AHS0003.pAbO" "CD14.MPHIP9.CD14.AHS0037.pAbO"
## rownames should be `genes`
## object contains 30,388 which is the 90k run minus Undetermined and MultipletsAdd Metadata to Seurat object
# meta-data reduced
df_meta <- read.csv("C:/Users/Cathal/Desktop/Nova_seq/Nova_Seq_reduced/df_metadata_reduced.csv")[ ,c('Cell_Index', 'Sample_Tag', 'Sample_Name', 'Cell_Type_Experimental')]
head(df_meta)## Cell_Index Sample_Tag Sample_Name Cell_Type_Experimental
## 1 748355 SampleTag04_hs P3 Natural_killer
## 2 156129 SampleTag02_hs P1 T_CD4_naive
## 3 468645 SampleTag04_hs P3 B
## 4 271758 SampleTag06_hs P5 T_CD8_naive
## 5 533235 SampleTag06_hs P5 T_CD4_memory
## 6 430102 SampleTag06_hs P5 T_CD4_memory
# Add metadata to seurat object
seurat <- AddMetaData(object = seurat, metadata = df_meta)
seurat@meta.data$Cell_Type_Experimental <- as.character(df_meta$Cell_Type_Experimental)
seurat@meta.data$Sample_Name <- as.character(df_meta$Sample_Name)
seurat@meta.data$Sample_Tag <- as.character(df_meta$Sample_Tag)
seurat@meta.data$Cell_Index <- as.character(df_meta$Cell_Index)
#seurat@meta.data$Bar1 <- as.character(df_meta_left$Bar1)
#seurat@meta.data$Bar2 <- as.character(df_meta_left$Bar2)
head(seurat)## orig.ident nCount_RNA nFeature_RNA Cell_Index Sample_Tag
## 748355 SeuratProject 171089 42 748355 SampleTag04_hs
## 156129 SeuratProject 115962 51 156129 SampleTag02_hs
## 468645 SeuratProject 112126 47 468645 SampleTag04_hs
## 271758 SeuratProject 86133 42 271758 SampleTag06_hs
## 533235 SeuratProject 76258 53 533235 SampleTag06_hs
## 430102 SeuratProject 71297 48 430102 SampleTag06_hs
## 229345 SeuratProject 69049 161 229345 SampleTag06_hs
## 579081 SeuratProject 58032 49 579081 SampleTag06_hs
## 243257 SeuratProject 51675 118 243257 SampleTag03_hs
## 240147 SeuratProject 48356 50 240147 SampleTag06_hs
## Sample_Name Cell_Type_Experimental
## 748355 P3 Natural_killer
## 156129 P1 T_CD4_naive
## 468645 P3 B
## 271758 P5 T_CD8_naive
## 533235 P5 T_CD4_memory
## 430102 P5 T_CD4_memory
## 229345 P5 Monocyte_classical
## 579081 P5 T_CD4_memory
## 243257 P2 B
## 240147 P5 T_CD8_memory
Quality control metrics and plots - remove low quality cells if needed
##########################################################################
################################ QC ######################################
##########################################################################
# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell
# observe counts per gene in each row of the seurat object
counts_per_gene <- Matrix::rowSums(seurat)
# https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
#seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
# Visualize QC metrics as a violin plot
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)plot <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotRun PCA and TSNE / UMAP and add cluster labels to metadata slot
##########################################################################
####################### Seurat Clustering ################################
##########################################################################
########### Set features as Abseqs for Clustering
Abseqs <- rownames(x = seurat[1:30])
Abseqs## [1] "CCR7.CCR7.AHS0273.pAbO" "CD11c.B.LY6.ITGAX.AHS0056.pAbO"
## [3] "CD127.IL7R.AHS0028.pAbO" "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
## [5] "CD137.TNFRSF9.AHS0003.pAbO" "CD14.MPHIP9.CD14.AHS0037.pAbO"
## [7] "CD161.HP.3G10.KLRB1.AHS0205.pAbO" "CD16.3G8.FCGR3A.AHS0053.pAbO"
## [9] "CD183.CXCR3.AHS0031.pAbO" "CD196.CCR6.AHS0034.pAbO"
## [11] "CD19.SJ25C1.CD19.AHS0030.pAbO" "CD25.2A3.IL2RA.AHS0026.pAbO"
## [13] "CD272.BTLA.AHS0052.pAbO" "CD278.ICOS.AHS0012.pAbO"
## [15] "CD279.EH12.1.PDCD1.AHS0014.pAbO" "CD27.M.T271.CD27.AHS0025.pAbO"
## [17] "CD28.L293.CD28.AHS0138.pAbO" "CD3.UCHT1.CD3E.AHS0231.pAbO"
## [19] "CD45RA.HI100.PTPRC.AHS0009.pAbO" "CD4.SK3.CD4.AHS0032.pAbO"
## [21] "CD56.NCAM16.2.NCAM1.AHS0019.pAbO" "CD62L.DREG.56.SELL.AHS0049.pAbO"
## [23] "CD8.SK1.CD8A.AHS0228.pAbO" "CXCR5.CXCR5.AHS0039.pAbO"
## [25] "CXCR6.CXCR6.AHS0148.pAbO" "GITR.TNFRSF18.AHS0104.pAbO"
## [27] "HLA.DR.CD74.AHS0035.pAbO" "IgD.IGHD.AHS0058.pAbO"
## [29] "IgM.IGHM.AHS0198.pAbO" "Tim3.HAVCR2.AHS0016.pAbO"
#all_genes <- rownames(seurat)
seurat <- ScaleData(seurat)
##seurat <- RunPCA(seurat, npcs = 20, features = Abseqs)
#ElbowPlot(seurat)
### chose 8 based on Elbow plot
seurat <- RunPCA(seurat, npcs = 8, features = Abseqs)
seurat <- FindNeighbors(seurat, dims = 1:8)
seurat <- RunTSNE(seurat, features = Abseqs, check_duplicates = FALSE, dims = 1:8)
seurat <- RunUMAP(seurat, dims = 1:8)
seurat <- FindClusters(seurat)## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 30388
## Number of edges: 907949
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8910
## Number of communities: 21
## Elapsed time: 12 seconds
## examine clustering results
print(seurat[["pca"]], dims = 1:5, nfeatures = 5)## PC_ 1
## Positive: CD183.CXCR3.AHS0031.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO, CD127.IL7R.AHS0028.pAbO, CXCR6.CXCR6.AHS0148.pAbO, CD272.BTLA.AHS0052.pAbO
## Negative: CCR7.CCR7.AHS0273.pAbO, IgD.IGHD.AHS0058.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO, CD4.SK3.CD4.AHS0032.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO
## PC_ 2
## Positive: CD196.CCR6.AHS0034.pAbO, Tim3.HAVCR2.AHS0016.pAbO, IgM.IGHM.AHS0198.pAbO, CD278.ICOS.AHS0012.pAbO, CD127.IL7R.AHS0028.pAbO
## Negative: CD4.SK3.CD4.AHS0032.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, CD8.SK1.CD8A.AHS0228.pAbO
## PC_ 3
## Positive: CXCR5.CXCR5.AHS0039.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD27.M.T271.CD27.AHS0025.pAbO
## Negative: CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO
## PC_ 4
## Positive: CD4.SK3.CD4.AHS0032.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, CD196.CCR6.AHS0034.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD278.ICOS.AHS0012.pAbO
## Negative: CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD8.SK1.CD8A.AHS0228.pAbO
## PC_ 5
## Positive: CD8.SK1.CD8A.AHS0228.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO
## Negative: CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD4.SK3.CD4.AHS0032.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO
VizDimLoadings(seurat, dims = 1, reduction = "pca")VizDimLoadings(seurat, dims = 2, reduction = "pca")seurat## An object of class Seurat
## 427 features across 30388 samples within 1 assay
## Active assay: RNA (427 features, 0 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
seurat[["pca"]]## A dimensional reduction object with key PC_
## Number of dimensions: 8
## Projected dimensional reduction calculated: FALSE
## Jackstraw run: FALSE
## Computed using assay: RNA
seurat[["tsne"]]## A dimensional reduction object with key tSNE_
## Number of dimensions: 2
## Projected dimensional reduction calculated: FALSE
## Jackstraw run: FALSE
## Computed using assay: RNA
head(Loadings(seurat, reduction = "pca")[])## PC_1 PC_2 PC_3
## CCR7.CCR7.AHS0273.pAbO 0.03099829 -0.005097648 -0.001345991
## CD11c.B.LY6.ITGAX.AHS0056.pAbO 0.18309285 -0.051342625 -0.054464568
## CD127.IL7R.AHS0028.pAbO 0.24340494 0.131382192 -0.058819374
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 0.19601403 -0.096718847 -0.115979664
## CD137.TNFRSF9.AHS0003.pAbO 0.18792889 -0.059036290 -0.103996714
## CD14.MPHIP9.CD14.AHS0037.pAbO 0.13468098 -0.032190753 -0.057956569
## PC_4 PC_5 PC_6
## CCR7.CCR7.AHS0273.pAbO -0.02062605 0.00418239 -0.02271642
## CD11c.B.LY6.ITGAX.AHS0056.pAbO -0.36108199 -0.34731127 0.36362665
## CD127.IL7R.AHS0028.pAbO 0.14392477 0.02342396 0.02264879
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.38469260 0.03484881 -0.32688354
## CD137.TNFRSF9.AHS0003.pAbO -0.30654957 0.01958164 -0.24383262
## CD14.MPHIP9.CD14.AHS0037.pAbO -0.30363725 -0.31265842 0.29978072
## PC_7 PC_8
## CCR7.CCR7.AHS0273.pAbO 0.009808829 -0.0000389384
## CD11c.B.LY6.ITGAX.AHS0056.pAbO -0.035740767 -0.0662437676
## CD127.IL7R.AHS0028.pAbO 0.021734495 0.0213257202
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.003149277 -0.0251046771
## CD137.TNFRSF9.AHS0003.pAbO -0.003837044 -0.0131323868
## CD14.MPHIP9.CD14.AHS0037.pAbO 0.038330066 -0.4362019790
DimPlot(seurat, reduction = 'tsne')Dim reduction plots
DimPlot(seurat, reduction = 'tsne')# Use SevenBridges Immune Classification file as clusters
seurat <- SetIdent(seurat, value = 'Cell_Type_Experimental')
DimPlot(seurat, reduction = 'tsne')DimPlot(seurat, reduction = 'umap')Dotplots
##########################################################################
############################ Dotplots ####################################
##########################################################################
########## RNA Dotplot
rna_list <- c('CD3E',
'CD4',
'CD8A',
'MS4A1',
'CD14',
'FCGR3A',
'ITGAM',
'CD5',
'FCER2',
'IL2RA',
'CD69',
'CD22',
'CD79B')
DotPlot(seurat, features = rna_list, group.by = 'Cell_Type_Experimental') + RotatedAxis()######## Abseq dotplot
# Use same Idents from above on y-axis
DotPlot(seurat, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis()