Nova-seq_clustering_reduced

Flowchart

Workflow Flowchart.

##########################################################################
####################### Load relevant libraries ##########################
##########################################################################

library(dplyr)
library(tidyr)
library(deMULTIplex)
library(ggplot2)
library(stringr)
library(ShortRead)
library(Seurat)
library(SeuratDisk)
library(SeuratObject)
library(SingleCellExperiment)
library(scater)
library(scran)
library(tibble)
# read in
counts_mat_tt3 <- read.csv(file = "C:/Users/Cathal/Desktop/Nova_seq/Nova_Seq_reduced/counts_mat_tt2.csv")

colnames(counts_mat_tt3) <- gsub("X", "", colnames(counts_mat_tt3))

rownames(counts_mat_tt3) <- counts_mat_tt3[,1]

counts_mat_tt3[,1] <- NULL

# as matrix
counts_matrix_reduced <- as.matrix(counts_mat_tt3)

## counts mat reduced, transposed for Seurat

head(counts_matrix_reduced)[1:6,1:10]
##                                  748355 156129 468645 271758 533235 430102
## CCR7.CCR7.AHS0273.pAbO           168668   5281   4530    172    444   1986
## CD11c.B.LY6.ITGAX.AHS0056.pAbO      111   3678   3421    123    202    860
## CD127.IL7R.AHS0028.pAbO              57   3488   3525     74    135   3400
## CD134.ACT35.TNFRSF4.AHS0013.pAbO    164   7220   6553    268    296   1063
## CD137.TNFRSF9.AHS0003.pAbO          120   7677   6995    191    511    894
## CD14.MPHIP9.CD14.AHS0037.pAbO        27   2886   2798     65    103    322
##                                  229345 579081 243257 240147
## CCR7.CCR7.AHS0273.pAbO             3034   1612   1368   1700
## CD11c.B.LY6.ITGAX.AHS0056.pAbO     2843    693   1061    695
## CD127.IL7R.AHS0028.pAbO            1610   2961   2547   2302
## CD134.ACT35.TNFRSF4.AHS0013.pAbO   4099    702    933    969
## CD137.TNFRSF9.AHS0003.pAbO         4840    643    737    720
## CD14.MPHIP9.CD14.AHS0037.pAbO      1956    227    301    298
###

tail(counts_matrix_reduced)[1:6,1:10]
##        748355 156129 468645 271758 533235 430102 229345 579081 243257 240147
## XBP1        0      0      0      0      0      1      4      0      4      0
## YBX3        0      0      0      0      0      0      0      0      8      0
## ZAP70       0      0      0      0      0      0      0      0      0      0
## ZBED2       0      0      0      0      0      0      8      0      0      0
## ZBTB16      0      0      0      0      0      0      0      0      0      0
## ZNF683      0      0      0      0      0      0      0      0      0      0

Construct Seurat / SCE object

##########################################################################
######################## Make SC obj #####################################
##########################################################################


# Seurat
seurat <- CreateSeuratObject(counts = counts_matrix_reduced)


###



## View

seurat
## An object of class Seurat 
## 427 features across 30388 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
head(rownames(seurat))
## [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
## [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
## [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"
## rownames should be `genes`

## object contains 30,388 which is the 90k run minus Undetermined and Multiplets

Add Metadata to Seurat object

# meta-data reduced

df_meta <- read.csv("C:/Users/Cathal/Desktop/Nova_seq/Nova_Seq_reduced/df_metadata_reduced.csv")[ ,c('Cell_Index', 'Sample_Tag', 'Sample_Name', 'Cell_Type_Experimental')]

head(df_meta)
##   Cell_Index     Sample_Tag Sample_Name Cell_Type_Experimental
## 1     748355 SampleTag04_hs          P3         Natural_killer
## 2     156129 SampleTag02_hs          P1            T_CD4_naive
## 3     468645 SampleTag04_hs          P3                      B
## 4     271758 SampleTag06_hs          P5            T_CD8_naive
## 5     533235 SampleTag06_hs          P5           T_CD4_memory
## 6     430102 SampleTag06_hs          P5           T_CD4_memory
# Add metadata to seurat object
seurat <- AddMetaData(object = seurat, metadata = df_meta)


seurat@meta.data$Cell_Type_Experimental <- as.character(df_meta$Cell_Type_Experimental)
seurat@meta.data$Sample_Name <- as.character(df_meta$Sample_Name)
seurat@meta.data$Sample_Tag <- as.character(df_meta$Sample_Tag)
seurat@meta.data$Cell_Index <- as.character(df_meta$Cell_Index)
#seurat@meta.data$Bar1 <- as.character(df_meta_left$Bar1)
#seurat@meta.data$Bar2 <- as.character(df_meta_left$Bar2)

head(seurat)
##           orig.ident nCount_RNA nFeature_RNA Cell_Index     Sample_Tag
## 748355 SeuratProject     171089           42     748355 SampleTag04_hs
## 156129 SeuratProject     115962           51     156129 SampleTag02_hs
## 468645 SeuratProject     112126           47     468645 SampleTag04_hs
## 271758 SeuratProject      86133           42     271758 SampleTag06_hs
## 533235 SeuratProject      76258           53     533235 SampleTag06_hs
## 430102 SeuratProject      71297           48     430102 SampleTag06_hs
## 229345 SeuratProject      69049          161     229345 SampleTag06_hs
## 579081 SeuratProject      58032           49     579081 SampleTag06_hs
## 243257 SeuratProject      51675          118     243257 SampleTag03_hs
## 240147 SeuratProject      48356           50     240147 SampleTag06_hs
##        Sample_Name Cell_Type_Experimental
## 748355          P3         Natural_killer
## 156129          P1            T_CD4_naive
## 468645          P3                      B
## 271758          P5            T_CD8_naive
## 533235          P5           T_CD4_memory
## 430102          P5           T_CD4_memory
## 229345          P5     Monocyte_classical
## 579081          P5           T_CD4_memory
## 243257          P2                      B
## 240147          P5           T_CD8_memory

Quality control metrics and plots - remove low quality cells if needed

##########################################################################
################################ QC ######################################
##########################################################################


# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell

# observe counts per gene in each row of the seurat object
counts_per_gene <- Matrix::rowSums(seurat)


# https://satijalab.org/seurat/articles/pbmc3k_tutorial.html


# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell


# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
#seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")


# Visualize QC metrics as a violin plot
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

plot <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot

Run PCA and TSNE / UMAP and add cluster labels to metadata slot

##########################################################################
####################### Seurat Clustering ################################
##########################################################################



########### Set features as Abseqs for Clustering

Abseqs <- rownames(x = seurat[1:30])
Abseqs
##  [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
##  [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
##  [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"   
##  [7] "CD161.HP.3G10.KLRB1.AHS0205.pAbO" "CD16.3G8.FCGR3A.AHS0053.pAbO"    
##  [9] "CD183.CXCR3.AHS0031.pAbO"         "CD196.CCR6.AHS0034.pAbO"         
## [11] "CD19.SJ25C1.CD19.AHS0030.pAbO"    "CD25.2A3.IL2RA.AHS0026.pAbO"     
## [13] "CD272.BTLA.AHS0052.pAbO"          "CD278.ICOS.AHS0012.pAbO"         
## [15] "CD279.EH12.1.PDCD1.AHS0014.pAbO"  "CD27.M.T271.CD27.AHS0025.pAbO"   
## [17] "CD28.L293.CD28.AHS0138.pAbO"      "CD3.UCHT1.CD3E.AHS0231.pAbO"     
## [19] "CD45RA.HI100.PTPRC.AHS0009.pAbO"  "CD4.SK3.CD4.AHS0032.pAbO"        
## [21] "CD56.NCAM16.2.NCAM1.AHS0019.pAbO" "CD62L.DREG.56.SELL.AHS0049.pAbO" 
## [23] "CD8.SK1.CD8A.AHS0228.pAbO"        "CXCR5.CXCR5.AHS0039.pAbO"        
## [25] "CXCR6.CXCR6.AHS0148.pAbO"         "GITR.TNFRSF18.AHS0104.pAbO"      
## [27] "HLA.DR.CD74.AHS0035.pAbO"         "IgD.IGHD.AHS0058.pAbO"           
## [29] "IgM.IGHM.AHS0198.pAbO"            "Tim3.HAVCR2.AHS0016.pAbO"
#all_genes <- rownames(seurat)


seurat <- ScaleData(seurat)



##seurat <- RunPCA(seurat, npcs = 20, features = Abseqs)

#ElbowPlot(seurat)

### chose 8 based on Elbow plot

seurat <- RunPCA(seurat, npcs = 8, features = Abseqs)

seurat <- FindNeighbors(seurat, dims = 1:8)

seurat <- RunTSNE(seurat, features = Abseqs, check_duplicates = FALSE, dims = 1:8)

seurat <- RunUMAP(seurat, dims = 1:8)

seurat <- FindClusters(seurat)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 30388
## Number of edges: 907949
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8910
## Number of communities: 21
## Elapsed time: 12 seconds
## examine clustering results

print(seurat[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  CD183.CXCR3.AHS0031.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO, CD127.IL7R.AHS0028.pAbO, CXCR6.CXCR6.AHS0148.pAbO, CD272.BTLA.AHS0052.pAbO 
## Negative:  CCR7.CCR7.AHS0273.pAbO, IgD.IGHD.AHS0058.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO, CD4.SK3.CD4.AHS0032.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO 
## PC_ 2 
## Positive:  CD196.CCR6.AHS0034.pAbO, Tim3.HAVCR2.AHS0016.pAbO, IgM.IGHM.AHS0198.pAbO, CD278.ICOS.AHS0012.pAbO, CD127.IL7R.AHS0028.pAbO 
## Negative:  CD4.SK3.CD4.AHS0032.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, CD8.SK1.CD8A.AHS0228.pAbO 
## PC_ 3 
## Positive:  CXCR5.CXCR5.AHS0039.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD27.M.T271.CD27.AHS0025.pAbO 
## Negative:  CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO 
## PC_ 4 
## Positive:  CD4.SK3.CD4.AHS0032.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, CD196.CCR6.AHS0034.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD278.ICOS.AHS0012.pAbO 
## Negative:  CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD8.SK1.CD8A.AHS0228.pAbO 
## PC_ 5 
## Positive:  CD8.SK1.CD8A.AHS0228.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO 
## Negative:  CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD4.SK3.CD4.AHS0032.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO
VizDimLoadings(seurat, dims = 1, reduction = "pca")

VizDimLoadings(seurat, dims = 2, reduction = "pca")

seurat
## An object of class Seurat 
## 427 features across 30388 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
seurat[["pca"]]
## A dimensional reduction object with key PC_ 
##  Number of dimensions: 8 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
seurat[["tsne"]]
## A dimensional reduction object with key tSNE_ 
##  Number of dimensions: 2 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
head(Loadings(seurat, reduction = "pca")[])
##                                        PC_1         PC_2         PC_3
## CCR7.CCR7.AHS0273.pAbO           0.03099829 -0.005097648 -0.001345991
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   0.18309285 -0.051342625 -0.054464568
## CD127.IL7R.AHS0028.pAbO          0.24340494  0.131382192 -0.058819374
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 0.19601403 -0.096718847 -0.115979664
## CD137.TNFRSF9.AHS0003.pAbO       0.18792889 -0.059036290 -0.103996714
## CD14.MPHIP9.CD14.AHS0037.pAbO    0.13468098 -0.032190753 -0.057956569
##                                         PC_4        PC_5        PC_6
## CCR7.CCR7.AHS0273.pAbO           -0.02062605  0.00418239 -0.02271642
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   -0.36108199 -0.34731127  0.36362665
## CD127.IL7R.AHS0028.pAbO           0.14392477  0.02342396  0.02264879
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.38469260  0.03484881 -0.32688354
## CD137.TNFRSF9.AHS0003.pAbO       -0.30654957  0.01958164 -0.24383262
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.30363725 -0.31265842  0.29978072
##                                          PC_7          PC_8
## CCR7.CCR7.AHS0273.pAbO            0.009808829 -0.0000389384
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   -0.035740767 -0.0662437676
## CD127.IL7R.AHS0028.pAbO           0.021734495  0.0213257202
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.003149277 -0.0251046771
## CD137.TNFRSF9.AHS0003.pAbO       -0.003837044 -0.0131323868
## CD14.MPHIP9.CD14.AHS0037.pAbO     0.038330066 -0.4362019790
DimPlot(seurat, reduction = 'tsne')

Dim reduction plots

DimPlot(seurat, reduction = 'tsne')

# Use SevenBridges Immune Classification file as clusters

seurat <- SetIdent(seurat, value = 'Cell_Type_Experimental')

DimPlot(seurat, reduction = 'tsne')

DimPlot(seurat, reduction = 'umap')

Dotplots

##########################################################################
############################ Dotplots ####################################
##########################################################################

########## RNA Dotplot


rna_list <- c('CD3E',
              'CD4',
              'CD8A',
              'MS4A1',
              'CD14',
              'FCGR3A',
              'ITGAM',
              'CD5',
              'FCER2',
              'IL2RA',
              'CD69',
              'CD22',
              'CD79B')


DotPlot(seurat, features = rna_list, group.by = 'Cell_Type_Experimental') + RotatedAxis()

######## Abseq dotplot


# Use same Idents from above on y-axis

DotPlot(seurat, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis()