Nova-seq

SevenBridges Run -> BD Rhapsody™ Targeted Analysis Pipeline run - 06-10-22 08:01:34

Flowchart

Workflow Flowchart.

##########################################################################
####################### Load relevant libraries ##########################
##########################################################################

library(dplyr)
library(tidyr)
library(deMULTIplex)
library(ggplot2)
library(stringr)
library(ShortRead)
library(Seurat)
library(SeuratDisk)
library(SeuratObject)
library(SingleCellExperiment)
library(scater)
library(scran)
library(tibble)

Read in SevenBridges datasets

Read in count matrix from SevenBridges OR Read in an already transposed data-frame. Then format data to construct a Seurat or SCE (Single Cell Experiment) object. Most single-cell algorithms and packages expect matrices to be in genes x cells format (genes as rownames , cells as colnames) and SevenBridges outputs the data tables in cell x genes format.
Read in Immune Classification file and SMK Calls file from SevenBridges run

##########################################################################
########################## SB data-frames ################################
##########################################################################

## read in files

# counts
df_sb <- read.csv("C:/Users/Cathal/Downloads/Combined_Nova-merged-LMO-filtered-OUT-Abseq-cell-calling-TRUE_RSEC_MolsPerCell.csv")


##1 - transpose in R and construct single-cell object
# transpose existing df
#counts_matrix_transposed_existing <- t(counts_matrix)


##2 - transpose matrix in Excel and read in
# write out
#write.csv(counts_matrix, file = "C:/Users/Cathal/Desktop/counts_mtx_untransposed.csv", row.names = F)

head(df_sb)[1:6,1:5]

##   Cell_Index CCR7.CCR7.AHS0273.pAbO CD11c.B.LY6.ITGAX.AHS0056.pAbO
## 1     748355                 168668                            111
## 2     255398                    313                            134
## 3     156129                   5281                           3678
## 4     703830                   4869                           3730
## 5     468645                   4530                           3421
## 6     240985                    231                             93
##   CD127.IL7R.AHS0028.pAbO CD134.ACT35.TNFRSF4.AHS0013.pAbO
## 1                      57                              164
## 2                      62                              168
## 3                    3488                             7220
## 4                    3133                             7577
## 5                    3525                             6553
## 6                     115                              241

## - 2 read in the already transposed data-frame

#colnames(counts_matrix_new_tt) <- gsub("X", "", colnames(counts_matrix_new_tt))

# rownames(counts_matrix_new_tt) <- counts_matrix_new_tt$Cell_Index
# 
# counts_matrix_new_tt$Cell_Index <- NULL
# 
# counts_matrix_sc <- as.matrix(counts_matrix_new_tt)

####


## Nova-seq

## Immune class
df_Immune_class_Nova <- read.csv(file = "C:/Users/Cathal/Desktop/Nova_seq/Nova-cell_type_90k_experimental.csv")


## Calls file
df_smk_calls_Nova <- read.csv(file = "C:/Users/Cathal/Desktop/Nova_seq/Nova-90k_Sample_Tag_Calls.csv")

## merge them together
df_sev_bri_meta <- full_join(df_smk_calls_Nova, df_Immune_class_Nova, by = "Cell_Index")

head(df_sev_bri_meta)

##   Cell_Index     Sample_Tag  Sample_Name Cell_Type_Experimental
## 1     748355 SampleTag04_hs           P3         Natural_killer
## 2     255398   Undetermined Undetermined           T_CD4_memory
## 3     156129 SampleTag02_hs           P1            T_CD4_naive
## 4     703830      Multiplet    Multiplet                      B
## 5     468645 SampleTag04_hs           P3                      B
## 6     240985   Undetermined Undetermined           T_CD8_memory

# counts_mat <- read.csv(file = "C:/Users/Cathal/Desktop/Nova_seq/sev_bri_90k_tt_onlin.csv")
# 
# rownames(counts_mat) <- counts_mat$Cell_Index
# 
# counts_mat$Cell_Index <- NULL
# 
# counts_mat <- as.matrix(counts_mat)

deMULTIplex workflow - Nova-seq Parsed FASTQs

# 1. Read the full FASTQs into R using ShortRead (command:


## R1
r1 <- readFastq("C:/Users/Cathal/Downloads/Nova_merged_R1_filtered.fastq.gz") #)
#and converted to dataframe containing the first 76 bases (command:
readTable_r1 <- as.data.frame(subseq(sread(r1), 1, 60))


## R2
r2 <- readFastq("C:/Users/Cathal/Downloads/Nova_merged_R1_filtered.fastq.gz") #)
#and converted to dataframe containing the first 76 bases (command:
readTable_r2 <- as.data.frame(subseq(sread(r2), 1, 42))

## R1
###
#2. Parsed the R1 dataframe to include only 300K total reads to minimize compute and split into CellID and UMI sections. Command:
# readTable_r1_2 <- as.data.frame(matrix(0L, nrow=1e3, ncol=2))
# colnames(readTable_r1_2) <- c("cell","umi")
# 
# #readTable_r1_2 <- as.character(readTable_r1_2)
# 
# for (i in 1:1e3) {
#   temp <- unlist(strsplit(readTable_r1$x[i], split=""))
#   readTable_r1_2$cell[i] <- paste(temp[c(1:9,22:30,44:52)], collapse='')
#   readTable_r1_2$umi[i] <- paste(temp[c(53:60)], collapse='')
# }
# 
# head(readTable_r1_2)

# 3. Identified the most abundant cell barcodes (total reads >= 10) and parsed both the R1_2 and R2 dataframes using this index. Command:

# temp <- table(readTable_r1_2$cell)
# temp <- names(temp)[which(temp >= 10)] ## 4640 cells
# ind <- which(readTable_r1_2$cell %in% temp)
# 
# readTable_r1_3 <- readTable_r1_2[ind, ] ## dim = 62040 x 2
# readTable_r2_2_t <- readTable_r2$x[ind] ## dim = 62040 x 1
# 
# 
# 
# ## R2 
# temp <- table(readTable_r2_2$cell)
# temp <- names(temp)[which(temp >= 10)] ## 4640 cells
# ind <- which(readTable_r2_2$cell %in% temp)
# 
# readTable_r2_3 <- readTable_r2_2[ind, ] ## dim = 62040 x 2
# readTable_r2_2 <- readTable_r2$x[ind] ## dim = 62040 x 1

# 4. Identified reads in R2_2 dataframe including either MULTI-seq Bar1 or Bar2 sequences, combined indexed R1_3 and R2_2 into a final readTable object. Command:

# ind <- c(grep("GGAGAAGA", readTable_r2_2_t),grep("CCACAATG", readTable_r2_2_t))
# readTable_final <- as.data.frame(matrix(0L, nrow=length(ind), ncol=3)) ## dim = 3409 x 3
# colnames(readTable_final) <- c("Cell","UMI","Sample")
# readTable_final[,c("Cell","UMI")] <- readTable_r1_3[ind, ]
# temp <- readTable_r2_2_t[ind]
# readTable_final[grep("GGAGAAGA", temp),'Sample'] <- "GGAGAAGA"
# readTable_final[grep("CCACAATG", temp),'Sample'] <- "CCACAATG"

# 5. Ran MULTIseq.Align on this object and visualized the results. Command:

#barTable_final <- MULTIseq.align(readTable_final, cellIDs=unique(readTable_final$Cell), ref = unique(readTable_final$Sample))

barTable_all <- read.csv("C:/Users/Cathal/Downloads/bar_table_full.csv")

barTable_all <- select(barTable_all,-contains("nUMI"))
barTable_all <- select(barTable_all,-contains("label"))

head(barTable_all)

##   Bar1  Bar2 Cell_Index
## 1   99  1266     816010
## 2   79   706     514137
## 3  322 15551      74134
## 4  295 10550     846872
## 5   22   813     679912
## 6    3     2     223365

p <- ggplot(barTable_all, aes(x=Bar1, y=Bar2)) + geom_point(size=1) + theme_classic()
p

Construct Seurat / SCE object

##########################################################################
######################## Make SC obj #####################################
##########################################################################

# SCE
#sce <- SingleCellExperiment(assays = list(counts = counts_mat))

seurat <- LoadH5Seurat("C:/Users/Cathal/Desktop/Nova_seq/seurat_object.h5seurat")

# Seurat
#seurat <- CreateSeuratObject(counts = counts_mat)


###



## View

#sce

seurat

## An object of class Seurat 
## 427 features across 90522 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap

#head(rownames(sce))

head(rownames(seurat))

## [1] "CCR7-CCR7-AHS0273-pAbO"           "CD11c:B-LY6-ITGAX-AHS0056-pAbO"  
## [3] "CD127-IL7R-AHS0028-pAbO"          "CD134:ACT35-TNFRSF4-AHS0013-pAbO"
## [5] "CD137-TNFRSF9-AHS0003-pAbO"       "CD14:MPHIP9-CD14-AHS0037-pAbO"

Metadata to Seurat object

# join sev bridges data with LMO counts matrix
df_meta_left <- left_join(df_sev_bri_meta, barTable_all, by="Cell_Index")


## Add metadata to seurat object
# seurat <- AddMetaData(object = seurat, metadata = df_meta_left)
# 
# seurat@meta.data$Cell_Type_Experimental <- as.character(df_meta_left$Cell_Type_Experimental)
# seurat@meta.data$Sample_Name <- as.character(df_meta_left$Sample_Name)
# seurat@meta.data$Sample_Tag <- as.character(df_meta_left$Sample_Tag)
# seurat@meta.data$Cell_Index <- as.character(df_meta_left$Cell_Index)
# seurat@meta.data$Bar1 <- as.character(df_meta_left$Bar1)
# seurat@meta.data$Bar2 <- as.character(df_meta_left$Bar2)

Run PCA and TSNE / UMAP and add cluster labels to metadata slot

##########################################################################
####################### Seurat Clustering ################################
##########################################################################



########### Set features as Abseqs for Clustering

Abseqs <- rownames(x = seurat[1:30])
Abseqs

##  [1] "CCR7-CCR7-AHS0273-pAbO"           "CD11c:B-LY6-ITGAX-AHS0056-pAbO"  
##  [3] "CD127-IL7R-AHS0028-pAbO"          "CD134:ACT35-TNFRSF4-AHS0013-pAbO"
##  [5] "CD137-TNFRSF9-AHS0003-pAbO"       "CD14:MPHIP9-CD14-AHS0037-pAbO"   
##  [7] "CD161:HP-3G10-KLRB1-AHS0205-pAbO" "CD16:3G8-FCGR3A-AHS0053-pAbO"    
##  [9] "CD183-CXCR3-AHS0031-pAbO"         "CD196-CCR6-AHS0034-pAbO"         
## [11] "CD19:SJ25C1-CD19-AHS0030-pAbO"    "CD25:2A3-IL2RA-AHS0026-pAbO"     
## [13] "CD272-BTLA-AHS0052-pAbO"          "CD278-ICOS-AHS0012-pAbO"         
## [15] "CD279:EH12-1-PDCD1-AHS0014-pAbO"  "CD27:M-T271-CD27-AHS0025-pAbO"   
## [17] "CD28:L293-CD28-AHS0138-pAbO"      "CD3:UCHT1-CD3E-AHS0231-pAbO"     
## [19] "CD45RA:HI100-PTPRC-AHS0009-pAbO"  "CD4:SK3-CD4-AHS0032-pAbO"        
## [21] "CD56:NCAM16.2-NCAM1-AHS0019-pAbO" "CD62L:DREG-56-SELL-AHS0049-pAbO" 
## [23] "CD8:SK1-CD8A-AHS0228-pAbO"        "CXCR5-CXCR5-AHS0039-pAbO"        
## [25] "CXCR6-CXCR6-AHS0148-pAbO"         "GITR-TNFRSF18-AHS0104-pAbO"      
## [27] "HLA-DR-CD74-AHS0035-pAbO"         "IgD-IGHD-AHS0058-pAbO"           
## [29] "IgM-IGHM-AHS0198-pAbO"            "Tim3-HAVCR2-AHS0016-pAbO"

#all_genes <- rownames(seurat)


# seurat <- ScaleData(seurat)
# 
# seurat <- RunPCA(seurat, npcs = 8, features = Abseqs)
# 
# seurat <- FindNeighbors(seurat, dims = 1:8)
# 
# seurat <- RunTSNE(seurat, features = Abseqs, check_duplicates = FALSE)
# 
# seurat <- RunUMAP(seurat, dims = 1:8)
# 
# seurat <- FindClusters(seurat)



## examine clustering results

print(seurat[["pca"]], dims = 1:5, nfeatures = 5)

## PC_ 1 
## Positive:  CD183-CXCR3-AHS0031-pAbO, CXCR6-CXCR6-AHS0148-pAbO, CD279:EH12-1-PDCD1-AHS0014-pAbO, CD134:ACT35-TNFRSF4-AHS0013-pAbO, CD137-TNFRSF9-AHS0003-pAbO 
## Negative:  CD45RA:HI100-PTPRC-AHS0009-pAbO, HLA-DR-CD74-AHS0035-pAbO, CXCR5-CXCR5-AHS0039-pAbO, CD19:SJ25C1-CD19-AHS0030-pAbO, CD4:SK3-CD4-AHS0032-pAbO 
## PC_ 2 
## Positive:  CD27:M-T271-CD27-AHS0025-pAbO, CD3:UCHT1-CD3E-AHS0231-pAbO, CD8:SK1-CD8A-AHS0228-pAbO, CD134:ACT35-TNFRSF4-AHS0013-pAbO, CD137-TNFRSF9-AHS0003-pAbO 
## Negative:  CD196-CCR6-AHS0034-pAbO, Tim3-HAVCR2-AHS0016-pAbO, IgM-IGHM-AHS0198-pAbO, CD127-IL7R-AHS0028-pAbO, CXCR6-CXCR6-AHS0148-pAbO 
## PC_ 3 
## Positive:  CD3:UCHT1-CD3E-AHS0231-pAbO, CD4:SK3-CD4-AHS0032-pAbO, CD62L:DREG-56-SELL-AHS0049-pAbO, CD27:M-T271-CD27-AHS0025-pAbO, CD278-ICOS-AHS0012-pAbO 
## Negative:  HLA-DR-CD74-AHS0035-pAbO, CD19:SJ25C1-CD19-AHS0030-pAbO, CD11c:B-LY6-ITGAX-AHS0056-pAbO, CXCR5-CXCR5-AHS0039-pAbO, CD137-TNFRSF9-AHS0003-pAbO 
## PC_ 4 
## Positive:  CD11c:B-LY6-ITGAX-AHS0056-pAbO, IgD-IGHD-AHS0058-pAbO, CD14:MPHIP9-CD14-AHS0037-pAbO, CD16:3G8-FCGR3A-AHS0053-pAbO, CD56:NCAM16.2-NCAM1-AHS0019-pAbO 
## Negative:  CD19:SJ25C1-CD19-AHS0030-pAbO, CXCR5-CXCR5-AHS0039-pAbO, CD45RA:HI100-PTPRC-AHS0009-pAbO, CD62L:DREG-56-SELL-AHS0049-pAbO, CD27:M-T271-CD27-AHS0025-pAbO 
## PC_ 5 
## Positive:  CD25:2A3-IL2RA-AHS0026-pAbO, CD11c:B-LY6-ITGAX-AHS0056-pAbO, HLA-DR-CD74-AHS0035-pAbO, CD279:EH12-1-PDCD1-AHS0014-pAbO, IgM-IGHM-AHS0198-pAbO 
## Negative:  CD56:NCAM16.2-NCAM1-AHS0019-pAbO, GITR-TNFRSF18-AHS0104-pAbO, CD45RA:HI100-PTPRC-AHS0009-pAbO, CXCR5-CXCR5-AHS0039-pAbO, IgD-IGHD-AHS0058-pAbO

VizDimLoadings(seurat, dims = 1, reduction = "pca")

VizDimLoadings(seurat, dims = 2, reduction = "pca")

##########################################################################
####################### SCE Clustering ###################################
##########################################################################



## convert Seurat to SCE


# This function can produce some strange behavior so be cautious using it
#sce_conv <- as.SingleCellExperiment(seurat)



## Perform calculations on SCE object
## As per vignette found here -- http://bioconductor.org/books/3.13/OSCA.intro/analysis-overview.html#quick-start-simple


############1
## Feature selection for clustering
# Abseqs
# Abseqs_sce <- rownames(x = sce_conv[1:30])
# Abseqs_sce
# 
# ## all rows
# #all_genes <- rownames(seurat_sce)
# 
# 
# ############2
# ## Run PCA
# # subset_row is an important parameter here as it chooses what to cluster on.
# set.seed(1234)
# sce_conv <- runPCA(sce_conv, ncomponents = 8, subset_row=Abseqs_sce)
# 
# 
# #3
# # Clustering.
# library(bluster) # https://bioconductor.org/packages/devel/bioc/vignettes/bluster/inst/doc/clusterRows.html#introduction
# colLabels(sce_conv) <- clusterCells(sce_conv, use.dimred='PCA',
#     BLUSPARAM=NNGraphParam(cluster.fun="louvain"))
# 
# 
# # 4
# # tSNE / UMAP
# sce_conv <- runTSNE(sce_conv, dimred = 'PCA')
# #sce <- runUMAP(sce, dimred = 'PCA')

tSNE & UMAP. Immune classification file from SevenBridges labelling clusters.

# Use SevenBridges Immune Classification file as clusters

seurat <- SetIdent(seurat, value = 'Cell_Type_Experimental')

DimPlot(seurat, reduction = 'tsne')

DimPlot(seurat, reduction = 'umap')

Number of cells per cell type

table(seurat@meta.data$Cell_Type_Experimental)

## 
##                     B             Dendritic    Monocyte_classical 
##                 10054                   143                  2072 
## Monocyte_nonclassical        Natural_killer          T_CD4_memory 
##                   651                 10681                 21909 
##           T_CD4_naive          T_CD8_memory           T_CD8_naive 
##                 15784                 22059                  6884 
##         T_gamma_delta 
##                   285

tSNE - Sub-setting based on SMK’s and LMO’s.

##########################################################################
####################### Subset SMK & LMO's and plot ######################
##########################################################################


##### Note: Use Seurat object for this part

## Sub-setting based on SMK and LMO and generating plots


## Method 1

#LMO_1_cells_test <- WhichCells(seurat, idents = Cell_Type_Experimental == 'B' & Sample_Name == 'P1')


## Mean value in Bar1 column = 13
## Mean value in Bar2 column = 16


## P1
#cond 1
LMO_1_cells_P1 <- WhichCells(seurat, expression = Bar1 >= 13 & Sample_Name == 'P1')
# cond 2
LMO_2_cells_P1 <- WhichCells(seurat, expression = Bar2 >= 16 & Sample_Name == 'P1')


DimPlot(seurat, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P1, LMO_2_cells_P1),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("SMK 2 - P1") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P2 
# cond 1
LMO_1_cells_P2 <- WhichCells(seurat, expression = Bar1 >= 13 & Sample_Name == 'P2')
# cond 2
LMO_2_cells_P2 <- WhichCells(seurat, expression = Bar2 >= 16 & Sample_Name == 'P2')

DimPlot(seurat, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P2, LMO_2_cells_P2),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("SMK 3 - P2") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P3 
# # cond 1
LMO_1_cells_P3 <- WhichCells(seurat, expression = Bar1 >= 13 & Sample_Name == 'P3')
# cond 2
LMO_2_cells_P3 <- WhichCells(seurat, expression = Bar2 >= 16 & Sample_Name == 'P3')

DimPlot(seurat, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P3, LMO_2_cells_P3),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("SMK 4 - P3") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
### P4
# cond 1
LMO_1_cells_P4 <- WhichCells(seurat, expression = Bar1 >= 13 & Sample_Name == 'P4')
# cond 2
LMO_2_cells_P4 <- WhichCells(seurat, expression = Bar2 >= 16 & Sample_Name == 'P4')

DimPlot(seurat, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P4, LMO_2_cells_P4),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("SMK 5 - P4") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P5 
# # cond 1
LMO_1_cells_P5 <- WhichCells(seurat, expression = Bar1 >= 13 & Sample_Name == 'P5')
# cond 2
LMO_2_cells_P5 <- WhichCells(seurat, expression = Bar2 >= 16 & Sample_Name == 'P5')

DimPlot(seurat, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P5, LMO_2_cells_P5),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("SMK 6 - P5") + theme_bw() + theme(legend.position = 'top')

######################################################################################################
######################################################################################################

Dotplots

##########################################################################
############################ Dotplots ####################################
##########################################################################

########## RNA Dotplot


rna_list <- c('CD3E',
              'CD4',
              'CD8A',
              'MS4A1',
              'CD14',
              'FCGR3A',
              'ITGAM',
              'CD5',
              'FCER2',
              'IL2RA',
              'CD69',
              'CD22',
              'CD79B')


DotPlot(seurat, features = rna_list, group.by = 'Cell_Type_Experimental') + RotatedAxis()

######## Abseq dotplot


# Use same Idents from above on y-axis

DotPlot(seurat, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis()