Nova-seq analysis - 30k cells run

path = “C:/Users/Cathal/Documents/Nova_30k/”

Flowchart

Workflow Flowchart.

##########################################################################
####################### Load relevant libraries ##########################
##########################################################################

library(dplyr)
library(tidyr)
library(deMULTIplex)
library(ggplot2)
library(stringr)
library(Seurat)
library(SeuratObject)
library(SingleCellExperiment)
library(scater)
library(scran)
library(tibble)
library(sjmisc)
## read in and filter away Undetermined

counts_mat <- read.csv("C:/Users/Cathal/Documents/Nova_30k/Combined_Nova-LMOfilteredOUT-30kset-_RSEC_MolsPerCell.csv", skip = 7)
# 1 - SMK Calls file
smk_calls <- read.csv(file = "C:/Users/Cathal/Documents/Nova_30k/Nova-LMOfilteredOUT-30kset-_Sample_Tag_Calls.csv", skip=7)
head(smk_calls)
##   Cell_Index     Sample_Tag Sample_Name
## 1     513638 SampleTag06_hs          P5
## 2     471371 SampleTag06_hs          P5
## 3     442215 SampleTag06_hs          P5
## 4     159630 SampleTag06_hs          P5
## 5     295662 SampleTag06_hs          P5
## 6      46560 SampleTag06_hs          P5
# 2 - Immune Classification cell type file
Immune_class <- read.csv(file = "C:/Users/Cathal/Documents/Nova_30k/Combined_Nova-LMOfilteredOUT-30kset-_cell_type_experimental.csv")
head(Immune_class)
##   Cell_Index Cell_Type_Experimental
## 1     513638            T_CD8_naive
## 2     471371           T_CD4_memory
## 3     442215            T_CD8_naive
## 4     159630            T_CD8_naive
## 5     295662            T_CD4_naive
## 6      46560  Monocyte_nonclassical
#### merge SMK calls file and Immune class file
df_meta_SevenBridges <- full_join(smk_calls, Immune_class, by = "Cell_Index")
head(df_meta_SevenBridges)
##   Cell_Index     Sample_Tag Sample_Name Cell_Type_Experimental
## 1     513638 SampleTag06_hs          P5            T_CD8_naive
## 2     471371 SampleTag06_hs          P5           T_CD4_memory
## 3     442215 SampleTag06_hs          P5            T_CD8_naive
## 4     159630 SampleTag06_hs          P5            T_CD8_naive
## 5     295662 SampleTag06_hs          P5            T_CD4_naive
## 6      46560 SampleTag06_hs          P5  Monocyte_nonclassical
## merge meta to count mat
count_mat_full <- full_join(counts_mat, df_meta_SevenBridges, by = "Cell_Index")

## remove rows that contain "Undetermined" or "Multiplet"
count_mat_filtered <- count_mat_full[!(count_mat_full$Sample_Name=="Undetermined" | count_mat_full$Sample_Name=="Multiplet"),]

## reduced count mat 
count_mat_filtered_only_counts <- count_mat_filtered[,1:428]

## reduced meta data
SB_meta_filtered <- select(count_mat_filtered, Cell_Index, Sample_Tag:Cell_Type_Experimental)
# count_mat_filtered[,429:431]

## add LMO bartable to BD metadata
## Read in LMO barTable
barTable_all <- read.csv("C:/Users/Cathal/Downloads/bar_table_full.csv")
head(barTable_all)
##                         label nUMI_total Bar1  Bar2  nUMI Cell_Index
## 1 CAAGCAAGCTTGGTATGACAAGTAGAA       1416   99  1266  1365     816010
## 2 CAGCAAGATAAGGACTATAAAGAGGCC        820   79   706   785     514137
## 3 AAGACATGCCACCCAAAGACGCTGTTG      23080  322 15551 15873      74134
## 4 GAGCCAATACAGCACTTCCAATACAAG      12892  295 10550 10845     846872
## 5 ACAAGGATCAGGTTCGCTGTTAGCCTA        880   22   813   835     679912
## 6 TGGCTCAGAACGACCACCAGCCGCAAG          5    3     2     5     223365
# reduce
barTable_all <- select(barTable_all,-contains("nUMI"))
barTable_all <- select(barTable_all,-contains("label"))
head(barTable_all)
##   Bar1  Bar2 Cell_Index
## 1   99  1266     816010
## 2   79   706     514137
## 3  322 15551      74134
## 4  295 10550     846872
## 5   22   813     679912
## 6    3     2     223365
## Join sev bridges data with LMO barTable
#dim(df_meta_SevenBridges)
dim(barTable_all)
## [1] 228961      3
##
df_meta_all <- left_join(SB_meta_filtered, barTable_all, by="Cell_Index")


##########

## count matrix
## set rownames
rownames(count_mat_filtered_only_counts) <- count_mat_filtered_only_counts$Cell_Index
count_mat_filtered_only_counts$Cell_Index <- NULL
# check size of mat
dim(count_mat_filtered_only_counts)
## [1] 6671  427
# View top of mat
count_mat_filtered_only_counts[1:5,1:5]
##        CCR7.CCR7.AHS0273.pAbO CD11c.B.LY6.ITGAX.AHS0056.pAbO
## 513638                    336                            247
## 471371                    130                            102
## 442215                    296                            239
## 159630                    462                            193
## 295662                    237                           1448
##        CD127.IL7R.AHS0028.pAbO CD134.ACT35.TNFRSF4.AHS0013.pAbO
## 513638                     171                              446
## 471371                      98                              177
## 442215                      67                              170
## 159630                     169                              254
## 295662                     100                              313
##        CD137.TNFRSF9.AHS0003.pAbO
## 513638                        365
## 471371                        162
## 442215                        220
## 159630                        368
## 295662                        355
## tt - transpose
count_mat_filtered_only_counts_tt <- rotate_df(count_mat_filtered_only_counts)

## Ensure that cells and genes are in the correct position for Seurat
head(colnames(count_mat_filtered_only_counts_tt))
## [1] "513638" "471371" "442215" "159630" "295662" "46560"
dim(count_mat_filtered_only_counts_tt)
## [1]  427 6671
## read in as is

# counts_mat <- read.csv("C:/Users/Cathal/Documents/Nova_30k/Combined_Nova-LMOfilteredOUT-30kset-_RSEC_MolsPerCell.csv", skip = 7)
# 
# ## set rownames
# rownames(counts_mat) <- counts_mat$Cell_Index
# counts_mat$Cell_Index <- NULL
# # check size of mat
# dim(counts_mat)
# # View top of mat
# counts_mat[1:5,1:5]
# ## tt - transpose
# counts_mat_tt <- rotate_df(counts_mat)
# 
# ## Ensure that cells and genes are in the correct position for Seurat
# head(colnames(counts_mat_tt))
# dim(counts_mat_tt)

Construct Seurat / SCE object

##########################################################################
######################## Make SC obj #####################################
##########################################################################


# Seurat Filtered
seurat_filtered <- CreateSeuratObject(counts = count_mat_filtered_only_counts_tt)
## View
seurat_filtered
## An object of class Seurat 
## 427 features across 6671 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
head(rownames(seurat_filtered))
## [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
## [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
## [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"
# Seurat
# seurat <- CreateSeuratObject(counts = counts_mat_tt)
# ## View
# seurat
# head(rownames(seurat))

Add Metadata to Seurat object - filtered (no Undet) object

#
seurat_filtered <- AddMetaData(object = seurat_filtered, metadata = df_meta_all)
#
seurat_filtered@meta.data$Cell_Index <- as.character(df_meta_all$Cell_Index)
seurat_filtered@meta.data$Sample_Tag <- as.character(df_meta_all$Sample_Tag)
seurat_filtered@meta.data$Sample_Name <- as.character(df_meta_all$Sample_Name)
seurat_filtered@meta.data$Cell_Type_Experimental <- as.character(df_meta_all$Cell_Type_Experimental)
seurat_filtered@meta.data$Bar1 <- as.character(df_meta_all$Bar1)
seurat_filtered@meta.data$Bar2 <- as.character(df_meta_all$Bar2)
#
head(seurat_filtered)
##           orig.ident nCount_RNA nFeature_RNA Cell_Index     Sample_Tag
## 513638 SeuratProject      20892          203     513638 SampleTag06_hs
## 471371 SeuratProject       9722          172     471371 SampleTag06_hs
## 442215 SeuratProject      13152          205     442215 SampleTag06_hs
## 159630 SeuratProject      16423          185     159630 SampleTag06_hs
## 295662 SeuratProject      28206          190     295662 SampleTag06_hs
## 46560  SeuratProject      24229          142      46560 SampleTag06_hs
## 540373 SeuratProject      17587          222     540373 SampleTag06_hs
## 841489 SeuratProject       6958          130     841489 SampleTag03_hs
## 859743 SeuratProject      19467          187     859743 SampleTag06_hs
## 672798 SeuratProject       6356          147     672798 SampleTag06_hs
##        Sample_Name Cell_Type_Experimental Bar1 Bar2
## 513638          P5            T_CD8_naive   21  119
## 471371          P5           T_CD4_memory   15   49
## 442215          P5            T_CD8_naive   31  335
## 159630          P5            T_CD8_naive   16  116
## 295662          P5            T_CD4_naive   18  126
## 46560           P5  Monocyte_nonclassical    5   56
## 540373          P5         Natural_killer   36  227
## 841489          P2         Natural_killer    5    6
## 859743          P5            T_CD8_naive   14   98
## 672798          P5           T_CD8_memory   42   82

QC Plots - seurat filtered (no Undet)

##########################################################################
################################ QC ######################################
##########################################################################


# nFeature_RNA is the number of genes detected in each cell. nCount_RNA is the total number of molecules detected within a cell

# Visualize QC metrics as a violin plot
VlnPlot(seurat_filtered, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

# scatter
plot <- FeatureScatter(seurat_filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot

Subset for lower and Upper groups on nFeature plot

seurat_filtered_lower <- subset(seurat_filtered, subset = nFeature_RNA < 50)
VlnPlot(seurat_filtered_lower, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

seurat_filtered_Upper <- subset(seurat_filtered, subset = nFeature_RNA > 50)
VlnPlot(seurat_filtered_Upper, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

dim(seurat_filtered_lower)
## [1]  427 3922
dim(seurat_filtered_Upper)
## [1]  427 2731

Add Metadata to Seurat object (as is)

# # 1 - SMK Calls file
# smk_calls <- read.csv(file = "C:/Users/Cathal/Documents/Nova_30k/Nova-LMOfilteredOUT-30kset-_Sample_Tag_Calls.csv", skip=7)
# head(smk_calls)
# 
# ####################################
# # 2 - Immune Classification cell type file
# Immune_class <- read.csv(file = "C:/Users/Cathal/Documents/Nova_30k/Combined_Nova-LMOfilteredOUT-30kset-_cell_type_experimental.csv")
# head(Immune_class)
# 
# #### merge SMK calls file and Immune class file
# df_meta_SevenBridges <- full_join(smk_calls, Immune_class, by = "Cell_Index")
# head(df_meta_SevenBridges)
# 
# ## Read in LMO barTable
# barTable_all <- read.csv("C:/Users/Cathal/Downloads/bar_table_full.csv")
# head(barTable_all)
# # reduce
# barTable_all <- select(barTable_all,-contains("nUMI"))
# barTable_all <- select(barTable_all,-contains("label"))
# 
# head(barTable_all)
# 
# ## Join sev bridges data with LMO barTable
# 
# dim(df_meta_SevenBridges)
# dim(barTable_all)
# ##
# df_meta_all <- left_join(df_meta_SevenBridges, barTable_all, by="Cell_Index")
# 
# head(df_meta_all)
# 
# # meta-data reduced
# 
# ## 
# # df_meta <- read.csv("C:/Users/Cathal/Desktop/Nova_seq/Nova_Seq_reduced/df_metadata_reduced.csv")[ ,c('Cell_Index', 'Sample_Tag', 'Sample_Name', 'Cell_Type_Experimental')]
# # 
# # head(df_meta)
# 
# # Add metadata to seurat object
# seurat <- AddMetaData(object = seurat, metadata = df_meta_all)
# ## as.character()
# seurat@meta.data$Cell_Type_Experimental <- as.character(df_meta_all$Cell_Type_Experimental)
# seurat@meta.data$Sample_Name <- as.character(df_meta_all$Sample_Name)
# seurat@meta.data$Sample_Tag <- as.character(df_meta_all$Sample_Tag)
# seurat@meta.data$Cell_Index <- as.character(df_meta_all$Cell_Index)
# seurat@meta.data$Bar1 <- as.character(df_meta_all$Bar1)
# seurat@meta.data$Bar2 <- as.character(df_meta_all$Bar2)
# # 
# head(seurat)

Normalizing the data

##########################################################################
####################### Seurat Normalise #################################
##########################################################################

seurat_filtered <- NormalizeData(object = seurat_filtered, normalization.method = "LogNormalize")
seurat_filtered_lower <- NormalizeData(object = seurat_filtered_lower, normalization.method = "LogNormalize")
seurat_filtered_Upper <- NormalizeData(object = seurat_filtered_Upper, normalization.method = "LogNormalize")

seurat filtered - Run PCA and TSNE / UMAP

## Lower

########### Set features as Abseqs for Clustering
Abseqs <- rownames(x = seurat_filtered[1:30])
Abseqs
##  [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
##  [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
##  [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"   
##  [7] "CD161.HP.3G10.KLRB1.AHS0205.pAbO" "CD16.3G8.FCGR3A.AHS0053.pAbO"    
##  [9] "CD183.CXCR3.AHS0031.pAbO"         "CD196.CCR6.AHS0034.pAbO"         
## [11] "CD19.SJ25C1.CD19.AHS0030.pAbO"    "CD25.2A3.IL2RA.AHS0026.pAbO"     
## [13] "CD272.BTLA.AHS0052.pAbO"          "CD278.ICOS.AHS0012.pAbO"         
## [15] "CD279.EH12.1.PDCD1.AHS0014.pAbO"  "CD27.M.T271.CD27.AHS0025.pAbO"   
## [17] "CD28.L293.CD28.AHS0138.pAbO"      "CD3.UCHT1.CD3E.AHS0231.pAbO"     
## [19] "CD45RA.HI100.PTPRC.AHS0009.pAbO"  "CD4.SK3.CD4.AHS0032.pAbO"        
## [21] "CD56.NCAM16.2.NCAM1.AHS0019.pAbO" "CD62L.DREG.56.SELL.AHS0049.pAbO" 
## [23] "CD8.SK1.CD8A.AHS0228.pAbO"        "CXCR5.CXCR5.AHS0039.pAbO"        
## [25] "CXCR6.CXCR6.AHS0148.pAbO"         "GITR.TNFRSF18.AHS0104.pAbO"      
## [27] "HLA.DR.CD74.AHS0035.pAbO"         "IgD.IGHD.AHS0058.pAbO"           
## [29] "IgM.IGHM.AHS0198.pAbO"            "Tim3.HAVCR2.AHS0016.pAbO"
#all_genes <- rownames(seurat)

### Scale
seurat_filtered <- ScaleData(seurat_filtered)
### Run PCA with no of PC'S not specified, then check the Elbow plot. Re-run with the inflection number.
##seurat <- RunPCA(seurat, features = Abseqs)
#ElbowPlot(seurat)

### PCA
seurat_filtered <- RunPCA(seurat_filtered, features = Abseqs, npcs = 8)
### Find Neighbours
seurat_filtered <- FindNeighbors(seurat_filtered, dims = 1:8, k.param = 8)
### FindClusters
seurat_filtered <- FindClusters(seurat_filtered, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6671
## Number of edges: 145249
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8552
## Number of communities: 10
## Elapsed time: 0 seconds
### Run TSNE
seurat_filtered <- RunTSNE(seurat_filtered, features = Abseqs, check_duplicates = FALSE)
### Run UMAP
seurat_filtered <- RunUMAP(seurat_filtered, dims = 1:8)

## examine clustering results

print(seurat_filtered[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  CD183.CXCR3.AHS0031.pAbO, CXCR6.CXCR6.AHS0148.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO, CD279.EH12.1.PDCD1.AHS0014.pAbO, CD127.IL7R.AHS0028.pAbO 
## Negative:  CD45RA.HI100.PTPRC.AHS0009.pAbO, HLA.DR.CD74.AHS0035.pAbO, CXCR5.CXCR5.AHS0039.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, CD4.SK3.CD4.AHS0032.pAbO 
## PC_ 2 
## Positive:  CD3.UCHT1.CD3E.AHS0231.pAbO, CD4.SK3.CD4.AHS0032.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO 
## Negative:  HLA.DR.CD74.AHS0035.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, CXCR5.CXCR5.AHS0039.pAbO, CD196.CCR6.AHS0034.pAbO, IgD.IGHD.AHS0058.pAbO 
## PC_ 3 
## Positive:  CD19.SJ25C1.CD19.AHS0030.pAbO, CXCR5.CXCR5.AHS0039.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD25.2A3.IL2RA.AHS0026.pAbO, CD272.BTLA.AHS0052.pAbO 
## Negative:  CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD16.3G8.FCGR3A.AHS0053.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO 
## PC_ 4 
## Positive:  CD8.SK1.CD8A.AHS0228.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO 
## Negative:  CD196.CCR6.AHS0034.pAbO, CD278.ICOS.AHS0012.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, IgM.IGHM.AHS0198.pAbO 
## PC_ 5 
## Positive:  CD8.SK1.CD8A.AHS0228.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO, CD278.ICOS.AHS0012.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO 
## Negative:  CD4.SK3.CD4.AHS0032.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO
VizDimLoadings(seurat_filtered, dims = 1, reduction = "pca")

VizDimLoadings(seurat_filtered, dims = 2, reduction = "pca")

seurat_filtered
## An object of class Seurat 
## 427 features across 6671 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
seurat_filtered[["pca"]]
## A dimensional reduction object with key PC_ 
##  Number of dimensions: 8 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
seurat_filtered[["tsne"]]
## A dimensional reduction object with key tSNE_ 
##  Number of dimensions: 2 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
head(Loadings(seurat_filtered, reduction = "pca")[])
##                                       PC_1         PC_2        PC_3       PC_4
## CCR7.CCR7.AHS0273.pAbO           0.1938009  0.000156435  0.25585569  0.1621675
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   0.1588811 -0.029440623 -0.33258170  0.3090266
## CD127.IL7R.AHS0028.pAbO          0.2388517  0.001930392 -0.01274199 -0.2005014
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 0.2286413  0.050433417  0.10605091  0.2955924
## CD137.TNFRSF9.AHS0003.pAbO       0.2316507  0.023350995  0.07024575  0.2936611
## CD14.MPHIP9.CD14.AHS0037.pAbO    0.1589011 -0.017536423 -0.30707171  0.2602483
##                                         PC_5          PC_6        PC_7
## CCR7.CCR7.AHS0273.pAbO           -0.12714736 -0.1448861522 -0.07032805
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   -0.20566700 -0.0639695635  0.05295148
## CD127.IL7R.AHS0028.pAbO           0.02841255  0.0355902584 -0.05831701
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.04058188 -0.0001901695  0.05994144
## CD137.TNFRSF9.AHS0003.pAbO       -0.04332384 -0.0014217929  0.03931081
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.23150702 -0.1547029168  0.13282052
##                                         PC_8
## CCR7.CCR7.AHS0273.pAbO           -0.21181455
## CD11c.B.LY6.ITGAX.AHS0056.pAbO    0.04714327
## CD127.IL7R.AHS0028.pAbO          -0.02280638
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.04807316
## CD137.TNFRSF9.AHS0003.pAbO        0.01941131
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.06147973
DimPlot(seurat_filtered, reduction = 'tsne') + ggtitle("tSNE filtered")

DimPlot(seurat_filtered, reduction = 'umap') + ggtitle("UMAP filtered")

Lower - Run PCA and TSNE / UMAP

## Lower

########### Set features as Abseqs for Clustering
Abseqs <- rownames(x = seurat_filtered_lower[1:30])
Abseqs
##  [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
##  [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
##  [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"   
##  [7] "CD161.HP.3G10.KLRB1.AHS0205.pAbO" "CD16.3G8.FCGR3A.AHS0053.pAbO"    
##  [9] "CD183.CXCR3.AHS0031.pAbO"         "CD196.CCR6.AHS0034.pAbO"         
## [11] "CD19.SJ25C1.CD19.AHS0030.pAbO"    "CD25.2A3.IL2RA.AHS0026.pAbO"     
## [13] "CD272.BTLA.AHS0052.pAbO"          "CD278.ICOS.AHS0012.pAbO"         
## [15] "CD279.EH12.1.PDCD1.AHS0014.pAbO"  "CD27.M.T271.CD27.AHS0025.pAbO"   
## [17] "CD28.L293.CD28.AHS0138.pAbO"      "CD3.UCHT1.CD3E.AHS0231.pAbO"     
## [19] "CD45RA.HI100.PTPRC.AHS0009.pAbO"  "CD4.SK3.CD4.AHS0032.pAbO"        
## [21] "CD56.NCAM16.2.NCAM1.AHS0019.pAbO" "CD62L.DREG.56.SELL.AHS0049.pAbO" 
## [23] "CD8.SK1.CD8A.AHS0228.pAbO"        "CXCR5.CXCR5.AHS0039.pAbO"        
## [25] "CXCR6.CXCR6.AHS0148.pAbO"         "GITR.TNFRSF18.AHS0104.pAbO"      
## [27] "HLA.DR.CD74.AHS0035.pAbO"         "IgD.IGHD.AHS0058.pAbO"           
## [29] "IgM.IGHM.AHS0198.pAbO"            "Tim3.HAVCR2.AHS0016.pAbO"
#all_genes <- rownames(seurat)

### Scale
seurat_filtered_lower <- ScaleData(seurat_filtered_lower)
### Run PCA with no of PC'S not specified, then check the Elbow plot. Re-run with the inflection number.
##seurat <- RunPCA(seurat, features = Abseqs)
#ElbowPlot(seurat)

### PCA
seurat_filtered_lower <- RunPCA(seurat_filtered_lower, features = Abseqs, npcs = 8)
### Find Neighbours
seurat_filtered_lower <- FindNeighbors(seurat_filtered_lower, dims = 1:8, k.param = 8)
### FindClusters
seurat_filtered_lower <- FindClusters(seurat_filtered_lower, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3922
## Number of edges: 89286
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7777
## Number of communities: 9
## Elapsed time: 0 seconds
### Run TSNE
seurat_filtered_lower <- RunTSNE(seurat_filtered_lower, features = Abseqs, check_duplicates = FALSE)
### Run UMAP
seurat_filtered_lower <- RunUMAP(seurat_filtered_lower, dims = 1:8)

## examine clustering results

print(seurat_filtered_lower[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  CD134.ACT35.TNFRSF4.AHS0013.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD279.EH12.1.PDCD1.AHS0014.pAbO, CD27.M.T271.CD27.AHS0025.pAbO 
## Negative:  HLA.DR.CD74.AHS0035.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD196.CCR6.AHS0034.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO 
## PC_ 2 
## Positive:  CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD8.SK1.CD8A.AHS0228.pAbO, CD134.ACT35.TNFRSF4.AHS0013.pAbO 
## Negative:  CD196.CCR6.AHS0034.pAbO, CXCR6.CXCR6.AHS0148.pAbO, CD127.IL7R.AHS0028.pAbO, IgM.IGHM.AHS0198.pAbO, Tim3.HAVCR2.AHS0016.pAbO 
## PC_ 3 
## Positive:  CXCR5.CXCR5.AHS0039.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, HLA.DR.CD74.AHS0035.pAbO, CCR7.CCR7.AHS0273.pAbO 
## Negative:  CD4.SK3.CD4.AHS0032.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, Tim3.HAVCR2.AHS0016.pAbO, IgM.IGHM.AHS0198.pAbO 
## PC_ 4 
## Positive:  CD4.SK3.CD4.AHS0032.pAbO, CD3.UCHT1.CD3E.AHS0231.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO 
## Negative:  CD14.MPHIP9.CD14.AHS0037.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, GITR.TNFRSF18.AHS0104.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD137.TNFRSF9.AHS0003.pAbO 
## PC_ 5 
## Positive:  CD56.NCAM16.2.NCAM1.AHS0019.pAbO, GITR.TNFRSF18.AHS0104.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO, CCR7.CCR7.AHS0273.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO 
## Negative:  HLA.DR.CD74.AHS0035.pAbO, CD25.2A3.IL2RA.AHS0026.pAbO, CD279.EH12.1.PDCD1.AHS0014.pAbO, CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD183.CXCR3.AHS0031.pAbO
VizDimLoadings(seurat_filtered_lower, dims = 1, reduction = "pca")

VizDimLoadings(seurat_filtered_lower, dims = 2, reduction = "pca")

seurat_filtered_lower
## An object of class Seurat 
## 427 features across 3922 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
seurat_filtered_lower[["pca"]]
## A dimensional reduction object with key PC_ 
##  Number of dimensions: 8 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
seurat_filtered_lower[["tsne"]]
## A dimensional reduction object with key tSNE_ 
##  Number of dimensions: 2 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
head(Loadings(seurat_filtered_lower, reduction = "pca")[])
##                                        PC_1        PC_2         PC_3
## CCR7.CCR7.AHS0273.pAbO           0.21718548  0.05882817  0.198332280
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   0.17410050  0.17406107 -0.123079397
## CD127.IL7R.AHS0028.pAbO          0.04083493 -0.31596396 -0.061866005
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 0.32777481  0.08815482  0.093739424
## CD137.TNFRSF9.AHS0003.pAbO       0.32344297  0.08332290  0.058471766
## CD14.MPHIP9.CD14.AHS0037.pAbO    0.24301485  0.06125413  0.006062782
##                                           PC_4         PC_5        PC_6
## CCR7.CCR7.AHS0273.pAbO            0.0005803713  0.127346394 -0.47603517
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   -0.2360434610 -0.244623376  0.22486372
## CD127.IL7R.AHS0028.pAbO           0.0476963015 -0.058011682  0.06061241
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.0746219350  0.007186154  0.03714226
## CD137.TNFRSF9.AHS0003.pAbO       -0.1309533948 -0.019859797  0.03644272
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.2852640793  0.071990979  0.28379925
##                                         PC_7         PC_8
## CCR7.CCR7.AHS0273.pAbO           -0.14653354 -0.146645910
## CD11c.B.LY6.ITGAX.AHS0056.pAbO    0.16057141 -0.118384009
## CD127.IL7R.AHS0028.pAbO           0.16468683 -0.028797764
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.01126203  0.006922626
## CD137.TNFRSF9.AHS0003.pAbO       -0.01225284 -0.022260682
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.17390866 -0.024382164
DimPlot(seurat_filtered_lower, reduction = 'tsne') + ggtitle("tSNE Lower group")

DimPlot(seurat_filtered_lower, reduction = 'umap') + ggtitle("UMAP Lower group")

Upper - Run PCA and TSNE / UMAP

##########################################################################
####################### Seurat Clustering ################################
##########################################################################

## Upper

########### Set features as Abseqs for Clustering
Abseqs <- rownames(x = seurat_filtered_Upper[1:30])
Abseqs
##  [1] "CCR7.CCR7.AHS0273.pAbO"           "CD11c.B.LY6.ITGAX.AHS0056.pAbO"  
##  [3] "CD127.IL7R.AHS0028.pAbO"          "CD134.ACT35.TNFRSF4.AHS0013.pAbO"
##  [5] "CD137.TNFRSF9.AHS0003.pAbO"       "CD14.MPHIP9.CD14.AHS0037.pAbO"   
##  [7] "CD161.HP.3G10.KLRB1.AHS0205.pAbO" "CD16.3G8.FCGR3A.AHS0053.pAbO"    
##  [9] "CD183.CXCR3.AHS0031.pAbO"         "CD196.CCR6.AHS0034.pAbO"         
## [11] "CD19.SJ25C1.CD19.AHS0030.pAbO"    "CD25.2A3.IL2RA.AHS0026.pAbO"     
## [13] "CD272.BTLA.AHS0052.pAbO"          "CD278.ICOS.AHS0012.pAbO"         
## [15] "CD279.EH12.1.PDCD1.AHS0014.pAbO"  "CD27.M.T271.CD27.AHS0025.pAbO"   
## [17] "CD28.L293.CD28.AHS0138.pAbO"      "CD3.UCHT1.CD3E.AHS0231.pAbO"     
## [19] "CD45RA.HI100.PTPRC.AHS0009.pAbO"  "CD4.SK3.CD4.AHS0032.pAbO"        
## [21] "CD56.NCAM16.2.NCAM1.AHS0019.pAbO" "CD62L.DREG.56.SELL.AHS0049.pAbO" 
## [23] "CD8.SK1.CD8A.AHS0228.pAbO"        "CXCR5.CXCR5.AHS0039.pAbO"        
## [25] "CXCR6.CXCR6.AHS0148.pAbO"         "GITR.TNFRSF18.AHS0104.pAbO"      
## [27] "HLA.DR.CD74.AHS0035.pAbO"         "IgD.IGHD.AHS0058.pAbO"           
## [29] "IgM.IGHM.AHS0198.pAbO"            "Tim3.HAVCR2.AHS0016.pAbO"
#all_genes <- rownames(seurat)

### Scale
seurat_filtered_Upper <- ScaleData(seurat_filtered_Upper)
### Run PCA with no of PC'S not specified, then check the Elbow plot. Re-run with the inflection number.
##seurat <- RunPCA(seurat, features = Abseqs)
#ElbowPlot(seurat)
### PCA
#seurat <- RunPCA(seurat, features = Abseqs)
seurat_filtered_Upper <- RunPCA(seurat_filtered_Upper, npcs = 8, features = Abseqs)
### Find Neighbours
#seurat <- FindNeighbors(seurat, dims = 1:8)
seurat_filtered_Upper <- FindNeighbors(seurat_filtered_Upper, dims = 1:8, k.param = 8)
### FindClusters
seurat_filtered_Upper <- FindClusters(seurat_filtered_Upper, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2731
## Number of edges: 51578
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8844
## Number of communities: 12
## Elapsed time: 0 seconds
### Run TSNE
seurat_filtered_Upper <- RunTSNE(seurat_filtered_Upper, features = Abseqs, check_duplicates = FALSE)
### Run UMAP
seurat_filtered_Upper <- RunUMAP(seurat_filtered_Upper, dims = 1:8)

## examine clustering results
print(seurat_filtered_Upper[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  CD183.CXCR3.AHS0031.pAbO, CXCR6.CXCR6.AHS0148.pAbO, CD161.HP.3G10.KLRB1.AHS0205.pAbO, CD127.IL7R.AHS0028.pAbO, CD134.ACT35.TNFRSF4.AHS0013.pAbO 
## Negative:  HLA.DR.CD74.AHS0035.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CXCR5.CXCR5.AHS0039.pAbO, CD19.SJ25C1.CD19.AHS0030.pAbO, CD4.SK3.CD4.AHS0032.pAbO 
## PC_ 2 
## Positive:  CD19.SJ25C1.CD19.AHS0030.pAbO, HLA.DR.CD74.AHS0035.pAbO, CXCR5.CXCR5.AHS0039.pAbO, CD272.BTLA.AHS0052.pAbO, IgD.IGHD.AHS0058.pAbO 
## Negative:  CD3.UCHT1.CD3E.AHS0231.pAbO, CD4.SK3.CD4.AHS0032.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD27.M.T271.CD27.AHS0025.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO 
## PC_ 3 
## Positive:  CD27.M.T271.CD27.AHS0025.pAbO, CCR7.CCR7.AHS0273.pAbO, CD272.BTLA.AHS0052.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CXCR5.CXCR5.AHS0039.pAbO 
## Negative:  CD11c.B.LY6.ITGAX.AHS0056.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, Tim3.HAVCR2.AHS0016.pAbO, CD16.3G8.FCGR3A.AHS0053.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO 
## PC_ 4 
## Positive:  CD4.SK3.CD4.AHS0032.pAbO, CD62L.DREG.56.SELL.AHS0049.pAbO, HLA.DR.CD74.AHS0035.pAbO, CD14.MPHIP9.CD14.AHS0037.pAbO, CD272.BTLA.AHS0052.pAbO 
## Negative:  CD8.SK1.CD8A.AHS0228.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO, CD137.TNFRSF9.AHS0003.pAbO, CD134.ACT35.TNFRSF4.AHS0013.pAbO 
## PC_ 5 
## Positive:  CD25.2A3.IL2RA.AHS0026.pAbO, CD279.EH12.1.PDCD1.AHS0014.pAbO, CD28.L293.CD28.AHS0138.pAbO, CD278.ICOS.AHS0012.pAbO, CD134.ACT35.TNFRSF4.AHS0013.pAbO 
## Negative:  CD62L.DREG.56.SELL.AHS0049.pAbO, CD196.CCR6.AHS0034.pAbO, CD45RA.HI100.PTPRC.AHS0009.pAbO, CD56.NCAM16.2.NCAM1.AHS0019.pAbO, CD16.3G8.FCGR3A.AHS0053.pAbO
VizDimLoadings(seurat_filtered_Upper, dims = 1, reduction = "pca")

VizDimLoadings(seurat_filtered_Upper, dims = 2, reduction = "pca")

seurat_filtered_Upper
## An object of class Seurat 
## 427 features across 2731 samples within 1 assay 
## Active assay: RNA (427 features, 0 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
seurat_filtered_Upper[["pca"]]
## A dimensional reduction object with key PC_ 
##  Number of dimensions: 8 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
seurat_filtered_Upper[["tsne"]]
## A dimensional reduction object with key tSNE_ 
##  Number of dimensions: 2 
##  Projected dimensional reduction calculated:  FALSE 
##  Jackstraw run: FALSE 
##  Computed using assay: RNA
head(Loadings(seurat_filtered_Upper, reduction = "pca")[])
##                                        PC_1         PC_2        PC_3
## CCR7.CCR7.AHS0273.pAbO           0.19102216  0.066222876  0.25420696
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   0.09815911  0.020731725 -0.43316587
## CD127.IL7R.AHS0028.pAbO          0.26044554 -0.066910459 -0.01701734
## CD134.ACT35.TNFRSF4.AHS0013.pAbO 0.25559411 -0.005473693  0.05703747
## CD137.TNFRSF9.AHS0003.pAbO       0.25239357  0.029649142  0.01379313
## CD14.MPHIP9.CD14.AHS0037.pAbO    0.10554366  0.001300037 -0.40040960
##                                         PC_4        PC_5       PC_6
## CCR7.CCR7.AHS0273.pAbO            0.10468877 -0.01524346  0.3243067
## CD11c.B.LY6.ITGAX.AHS0056.pAbO    0.12542809  0.13141776  0.2263514
## CD127.IL7R.AHS0028.pAbO           0.05611576 -0.08936625 -0.1326299
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.06580390  0.23630450  0.2146629
## CD137.TNFRSF9.AHS0003.pAbO       -0.08591190  0.21612212  0.2367659
## CD14.MPHIP9.CD14.AHS0037.pAbO     0.18088448  0.08528087  0.2295177
##                                          PC_7        PC_8
## CCR7.CCR7.AHS0273.pAbO           -0.003214632  0.17991116
## CD11c.B.LY6.ITGAX.AHS0056.pAbO   -0.099616218  0.18425373
## CD127.IL7R.AHS0028.pAbO           0.120947613  0.05312593
## CD134.ACT35.TNFRSF4.AHS0013.pAbO -0.134175272  0.07153088
## CD137.TNFRSF9.AHS0003.pAbO       -0.132183452 -0.02523183
## CD14.MPHIP9.CD14.AHS0037.pAbO    -0.117342163  0.29907649
DimPlot(seurat_filtered_Upper, reduction = 'tsne') + ggtitle("tSNE Upper group")

DimPlot(seurat_filtered_Upper, reduction = 'umap') + ggtitle("UMAP Upper group")

Dotplots

##########################################################################
############################ Dotplots ####################################
##########################################################################

########## Dotplot

# RNA list
rna_list <- c('CD3E',
              'CD4',
              'CD8A',
              'MS4A1',
              'CD14',
              'FCGR3A',
              'ITGAM',
              'CD5',
              'FCER2',
              'IL2RA',
              'CD69',
              'CD22',
              'CD79B')


## seurat filtered
## Clusters Idents
DotPlot(seurat_filtered, features = rna_list) + RotatedAxis() + ggtitle("Dotplot - filtered all. RNA list")

## calls file Idents
DotPlot(seurat_filtered, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis() + ggtitle("Dotplot - filtered all. Abseqs")

## Lower
## Clusters Idents
DotPlot(seurat_filtered_lower, features = rna_list) + RotatedAxis() + ggtitle("Dotplot - Lower. RNA list")

## calls file Idents
DotPlot(seurat_filtered_lower, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis() + ggtitle("Dotplot - Lower. Abseqs")

## Upper
## Clusters Idents
DotPlot(seurat_filtered_Upper, features = rna_list) + RotatedAxis() + ggtitle("Dotplot - Upper. RNA list")

## calls file Idents
DotPlot(seurat_filtered_Upper, features = Abseqs, group.by = 'Cell_Type_Experimental') + RotatedAxis() + ggtitle("Dotplot - Upper. Abseqs")

seurat filtered all - Subset per SMK and LMO

##########################################################################
####################### Subset SMK & LMO's and plot ######################
##########################################################################

### P1
# cond 1
LMO_1_cells_P1 <- WhichCells(seurat_filtered, expression = Bar1 >= 1 & Sample_Name == 'P1')
# cond 2
LMO_2_cells_P1 <- WhichCells(seurat_filtered, expression = Bar2 >= 1 & Sample_Name == 'P1')


DimPlot(seurat_filtered, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P1, LMO_2_cells_P1),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 1 -- SMK2") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P2 
# # cond 1
LMO_1_cells_P2 <- WhichCells(seurat_filtered, expression = Bar1 >= 1 & Sample_Name == 'P2')
# cond 2
LMO_2_cells_P2 <- WhichCells(seurat_filtered, expression = Bar2 >= 1 & Sample_Name == 'P2')

DimPlot(seurat_filtered, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P2, LMO_2_cells_P2),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 2 -- SMK3") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P3 
# # cond 1
LMO_1_cells_P3 <- WhichCells(seurat_filtered, expression = Bar1 >= 1 & Sample_Name == 'P3')
# cond 2
LMO_2_cells_P3 <- WhichCells(seurat_filtered, expression = Bar2 >= 1 & Sample_Name == 'P3')

DimPlot(seurat_filtered, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P3, LMO_2_cells_P3),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 3 -- SMK4") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P4 
# # cond 1
LMO_1_cells_P4 <- WhichCells(seurat_filtered, expression = Bar1 >= 1 & Sample_Name == 'P4')
# cond 2
LMO_2_cells_P4 <- WhichCells(seurat_filtered, expression = Bar2 >= 1 & Sample_Name == 'P4')

DimPlot(seurat_filtered, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P4, LMO_2_cells_P4),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 4 -- SMK5") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P5 
# # cond 1
LMO_1_cells_P5 <- WhichCells(seurat_filtered, expression = Bar1 >= 1 & Sample_Name == 'P5')
# cond 2
LMO_2_cells_P5 <- WhichCells(seurat_filtered, expression = Bar2 >= 1 & Sample_Name == 'P5')

DimPlot(seurat_filtered, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P5, LMO_2_cells_P5),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 5 -- SMK6") + theme_bw() + theme(legend.position = 'top')

# by Calls file
DimPlot(seurat_filtered, reduction = 'tsne', group.by = 'Cell_Type_Experimental')

######################################################################################################
######################################################################################################

Lower - Subset per SMK and LMO

##########################################################################
####################### Subset SMK & LMO's and plot ######################
##########################################################################

### P1
# cond 1
LMO_1_cells_P1 <- WhichCells(seurat_filtered_lower, expression = Bar1 >= 1 & Sample_Name == 'P1')
# cond 2
LMO_2_cells_P1 <- WhichCells(seurat_filtered_lower, expression = Bar2 >= 1 & Sample_Name == 'P1')


DimPlot(seurat_filtered_lower, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P1, LMO_2_cells_P1),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 1 -- SMK2") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P2 
# # cond 1
LMO_1_cells_P2 <- WhichCells(seurat_filtered_lower, expression = Bar1 >= 1 & Sample_Name == 'P2')
# cond 2
LMO_2_cells_P2 <- WhichCells(seurat_filtered_lower, expression = Bar2 >= 1 & Sample_Name == 'P2')

DimPlot(seurat_filtered_lower, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P2, LMO_2_cells_P2),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 2 -- SMK3") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P3 
# # cond 1
LMO_1_cells_P3 <- WhichCells(seurat_filtered_lower, expression = Bar1 >= 1 & Sample_Name == 'P3')
# cond 2
LMO_2_cells_P3 <- WhichCells(seurat_filtered_lower, expression = Bar2 >= 1 & Sample_Name == 'P3')

DimPlot(seurat_filtered_lower, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P3, LMO_2_cells_P3),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 3 -- SMK4") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P4 
# # cond 1
LMO_1_cells_P4 <- WhichCells(seurat_filtered_lower, expression = Bar1 >= 1 & Sample_Name == 'P4')
# cond 2
LMO_2_cells_P4 <- WhichCells(seurat_filtered_lower, expression = Bar2 >= 1 & Sample_Name == 'P4')

DimPlot(seurat_filtered_lower, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P4, LMO_2_cells_P4),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 4 -- SMK5") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P5 
# # cond 1
LMO_1_cells_P5 <- WhichCells(seurat_filtered_lower, expression = Bar1 >= 1 & Sample_Name == 'P5')
# cond 2
LMO_2_cells_P5 <- WhichCells(seurat_filtered_lower, expression = Bar2 >= 1 & Sample_Name == 'P5')

DimPlot(seurat_filtered_lower, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P5, LMO_2_cells_P5),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 5 -- SMK6") + theme_bw() + theme(legend.position = 'top')

# by Calls file
DimPlot(seurat_filtered_lower, reduction = 'tsne', group.by = 'Cell_Type_Experimental')

######################################################################################################
######################################################################################################

Upper - Subset per SMK and LMO

##########################################################################
####################### Subset SMK & LMO's and plot ######################
##########################################################################

### P1
# cond 1
LMO_1_cells_P1 <- WhichCells(seurat_filtered_Upper, expression = Bar1 >= 1 & Sample_Name == 'P1')
# cond 2
LMO_2_cells_P1 <- WhichCells(seurat_filtered_Upper, expression = Bar2 >= 1 & Sample_Name == 'P1')


DimPlot(seurat_filtered_Upper, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P1, LMO_2_cells_P1),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 1 -- SMK2") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P2 
# # cond 1
LMO_1_cells_P2 <- WhichCells(seurat_filtered_Upper, expression = Bar1 >= 1 & Sample_Name == 'P2')
# cond 2
LMO_2_cells_P2 <- WhichCells(seurat_filtered_Upper, expression = Bar2 >= 1 & Sample_Name == 'P2')

DimPlot(seurat_filtered_Upper, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P2, LMO_2_cells_P2),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 2 -- SMK3") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P3 
# # cond 1
LMO_1_cells_P3 <- WhichCells(seurat_filtered_Upper, expression = Bar1 >= 1 & Sample_Name == 'P3')
# cond 2
LMO_2_cells_P3 <- WhichCells(seurat_filtered_Upper, expression = Bar2 >= 1 & Sample_Name == 'P3')

DimPlot(seurat_filtered_Upper, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P3, LMO_2_cells_P3),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 3 -- SMK4") + theme_bw() + theme(legend.position = 'top')

# 
# 
# 
# 
# ### P4 
# # cond 1
LMO_1_cells_P4 <- WhichCells(seurat_filtered_Upper, expression = Bar1 >= 1 & Sample_Name == 'P4')
# cond 2
LMO_2_cells_P4 <- WhichCells(seurat_filtered_Upper, expression = Bar2 >= 1 & Sample_Name == 'P4')

DimPlot(seurat_filtered_Upper, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P4, LMO_2_cells_P4),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 4 -- SMK5") + theme_bw() + theme(legend.position = 'top')

# 
# 
# ### P5 
# # cond 1
LMO_1_cells_P5 <- WhichCells(seurat_filtered_Upper, expression = Bar1 >= 1 & Sample_Name == 'P5')
# cond 2
LMO_2_cells_P5 <- WhichCells(seurat_filtered_Upper, expression = Bar2 >= 1 & Sample_Name == 'P5')

DimPlot(seurat_filtered_Upper, reduction = "tsne",
        cells.highlight = list(LMO_1_cells_P5, LMO_2_cells_P5),
        cols.highlight = c("darkblue", "darkorange"), cols = "grey", sizes.highlight = 0.9) +
    ggtitle("Patient 5 -- SMK6") + theme_bw() + theme(legend.position = 'top')

# by Calls file
DimPlot(seurat_filtered_Upper, reduction = 'tsne', group.by = 'Cell_Type_Experimental')

######################################################################################################
######################################################################################################

Doublet Finder

# seu filtered

# ## Pre-process Seurat object (standard) --------------------------------------------------------------------------------------
# #seu_kidney <- CreateSeuratObject(kidney.data)
# #seu_kidney <- NormalizeData(seu_kidney)
# seurat_filtered2 <- FindVariableFeatures(seurat_filtered, selection.method = "vst", nfeatures = 200)
# #seu_kidney <- ScaleData(seu_kidney)
# #seu_kidney <- RunPCA(seu_kidney)
# #seu_kidney <- RunUMAP(seu_kidney, dims = 1:10)
# 
# 
# ## Pre-process Seurat object (sctransform) -----------------------------------------------------------------------------------
# #seu_kidney <- CreateSeuratObject(kidney.data)
# seurat_filtered2 <- SCTransform(seurat_filtered2)
# #seu_kidney <- RunPCA(seu_kidney)
# #seu_kidney <- RunUMAP(seu_kidney, dims = 1:10)
# 
# 
# ## pK Identification (no ground-truth) ---------------------------------------------------------------------------------------
# sweep.res.list_filtered <- paramSweep_v3(seurat_filtered2, PCs = 1:8, sct = FALSE)
# sweep.stats_filtered <- summarizeSweep(sweep.res.list_filtered, GT = FALSE)
# bcmvn_filtered <- find.pK(sweep.stats_filtered)
# 
# 
# 
# ## pK Identification (ground-truth) ------------------------------------------------------------------------------------------
# sweep.res.list_filtered <- paramSweep_v3(seurat_filtered2, PCs = 1:8, sct = FALSE)
# gt.calls <- seurat_filtered2@meta.data[rownames(sweep.res.list_filtered[[1]]), "GT"].   ## GT is a vector containing "Singlet" and "Doublet" calls recorded using sample multiplexing classification and/or in silico geneotyping results 
# sweep.stats_filtered <- summarizeSweep(sweep.res.list_filtered, GT = TRUE, GT.calls = gt.calls)
# bcmvn_filtered <- find.pK(sweep.stats_filtered)
# 
# 
# ## Homotypic Doublet Proportion Estimate -------------------------------------------------------------------------------------
# homotypic.prop <- modelHomotypic(seurat_filtered2@meta.data$seurat_clusters)           ## ex: annotations <- seu_kidney@meta.data$ClusteringResults
# nExp_poi <- round(0.075*nrow(seurat_filtered2@meta.data))  ## Assuming 7.5% doublet formation rate - tailor for your dataset
# nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
# 
# ## Run DoubletFinder with varying classification stringencies ----------------------------------------------------------------
# seurat_filtered2 <- doubletFinder_v3(seurat_filtered2, PCs = 1:8, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
# seurat_filtered2 <- doubletFinder_v3(seurat_filtered2, PCs = 1:8, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = "pANN_0.25_0.09_500", sct = FALSE)
# 
# 
# ## plot
# 
# seurat_filtered2 <- doubletFinder_v3(seurat_filtered2, PCs = 1:8, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = "DF.classifications_0.25_0.09_500", sct = FALSE)
# 
# seurat_filtered2@meta.data[,"CellTypes_DF"] <- seurat_filtered2@meta.data$CT.Park
# 
# seurat_filtered2@meta.data$CellTypes_DF[which(seurat_filtered2@meta.data$DF.classifications_0.25_0.09_500 == "Doublet")] <- "Doublet"
# 
# DimPlot(seurat_filtered2, group.by="CellTypes_DF", reduction="tsne", pt.size=0.5, order=c("Coll.Duct.TC","Doublet"), cols=c("#66C2A5","#FFD92F","#8DA0CB","#A6D854","#E78AC3","#B3B3B3","#E5C494","black","#FC8D62"))