1. Set up
library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(dplyr)
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
## Attaching SeuratObject
#remotes::install_github("rnabioco/clustifyrdata")
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(ggpubr)
library(jpeg)
# #Install monocle3
# First in terminal do: brew install --build-from-source gcc
# BiocManager::install(c('BiocGenerics', 'DelayedArray', 'DelayedMatrixStats',
# 'limma', 'S4Vectors', 'SingleCellExperiment',
# 'SummarizedExperiment', 'batchelor', 'Matrix.utils'))
# devtools::install_github('cole-trapnell-lab/leidenbase')
# devtools::install_github('cole-trapnell-lab/monocle3')
# remotes::install_github('satijalab/seurat-wrappers')
library(leidenbase)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
##
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
4. Clustify
4.1. Trying different compute methods
# Download hema_microarray data from the DMAP study (Novershtern et al.Cell 2011)
dmap <- clustifyrdatahub::ref_hema_microarray()
## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27
## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation
## loading from cache
# clustify using cosine
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_cosine",
compute_method = "cosine", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.5
unique(res$hema_microarray_cosine_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Erythroid_CD34+ CD71+ GlyA-"
## [4] "Naïve B-cells"
## [5] "CD8+ Effector Memory"
## [6] "Erythroid_CD34- CD71+ GlyA+"
## [7] "unassigned"
# clustify using spearman
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_spearman",
compute_method = "spearman", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.54
unique(res$hema_microarray_spearman_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Naïve B-cells"
## [4] "CD8+ Effector Memory RA"
## [5] "Erythroid_CD34- CD71+ GlyA-"
## [6] "unassigned"
## [7] "CD4+ Central Memory"
# clustify using kendall
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_kendall",
compute_method = "kendall", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.39
unique(res$hema_microarray_kendall_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Naïve B-cells"
## [4] "CD8+ Effector Memory RA"
## [5] "Erythroid_CD34- CD71+ GlyA-"
## [6] "unassigned"
## [7] "CD4+ Central Memory"
# clustify using pearson
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0.2, #let clustify assign threshold
rename_prefix = "hema_microarray_pearson",
compute_method = "pearson", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.52
unique(res$hema_microarray_pearson_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Myeloid Dendritic Cell"
## [3] "unassigned"
## [4] "Naïve B-cells"
## [5] "Monocyte"
## [6] "CD8+ Effector Memory"
## [7] "Erythroid_CD34- CD71lo GlyA+"
## [8] "Mature NK cell_CD56- CD16+ CD3-"
## [9] "Naive CD4+ T-cell"
# pearson seems to give us best assignments at auto-assigned threshold
4.2. Trying different threshold settings using the pearson method
# Set threshold to many values and test their influence on assignments
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
rename_prefix = "hema_microarray_pearson_0.1",
compute_method = "pearson", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
# Check the assignments
res@meta.data %>%
as_tibble() %>% # drop the rown names
dplyr::select(c("sct_pca_snn_res.0.2", "seurat_clusters", "hema_microarray_pearson_0.1_type")) %>%
unique() %>%
arrange(seurat_clusters)
## # A tibble: 11 x 3
## sct_pca_snn_res.0.2 seurat_clusters hema_microarray_pearson_0.1_type
## <fct> <chr> <chr>
## 1 0 0 "Monocyte"
## 2 1 1 "Hematopoietic stem cell_CD133+ CD34dim"
## 3 10 10 "Naive CD4+ T-cell"
## 4 2 2 "Colony Forming Unit-Monocyte "
## 5 3 3 "CD8+ Effector Memory"
## 6 4 4 "Myeloid Dendritic Cell"
## 7 5 5 "Erythroid_CD34+ CD71+ GlyA-"
## 8 6 6 "Naïve B-cells"
## 9 7 7 "Erythroid_CD34- CD71lo GlyA+"
## 10 8 8 "Mature NK cell_CD56- CD16+ CD3-"
## 11 9 9 "Hematopoietic stem cell_CD133+ CD34dim"
# Display clusters
DimPlot(res, reduction = "umap",
group.by = "hema_microarray_pearson_0.1_type")

4.3. Simplify cluster names
current.cluster.names <- unique(res$hema_microarray_pearson_0.1_type)
# Full names
new.cluster.names <- c("Primitive", #"Hematopoietic stem cell_CD133+ CD34dim",
"Myeloid Dendritic Cell",# "Myeloid Dendritic Cell",
"Erythroid (CD34+,CD71+,GlyA-)", # "Erythroid_CD34+ CD71+ GlyA-",
"CFU-Mono",#"Colony Forming Unit-Monocyte ",
"B-cells", # "Naïve B-cells",
"Monocytic", #"Monocyte"
"CD8+ T-cells",# "CD8+ Effector Memory",
"Erythroid (CD34-,CD71lo,GlyA+)", # "Erythroid_CD34- CD71lo GlyA+",
"NK-cells",# "Mature NK cell_CD56- CD16+ CD3-"
"CD4+ T-cells")# "Naive CD4+ T-cell",
# Short names
short.cluster.names <- c("Prim (CD34-dim)", #"Hematopoietic stem cell_CD133+ CD34dim",
"MDC",# "Myeloid Dendritic Cell",
"Prim (CD34-high)", # "Erythroid_CD34+ CD71+ GlyA-",
"CFU-Mono",#"Colony Forming Unit-Monocyte ",
"B", # "Naïve B-cells",
"Mono", #"Monocyte"
"CD8-T",# "CD8+ Effector Memory",
"Ery", # "Erythroid_CD34- CD71lo GlyA+",
"NK?",# "Mature NK cell_CD56- CD16+ CD3-"
"CD4-T")# "Naive CD4+ T-cell",
res$Clusters <- plyr::mapvalues(x=res$hema_microarray_pearson_0.1_type,
from = current.cluster.names,
to = new.cluster.names)# Check the assignments
res$Subpopulations <- plyr::mapvalues(x=res$hema_microarray_pearson_0.1_type,
from = current.cluster.names,
to = short.cluster.names)# Check the assignments
res@meta.data %>%
as_tibble() %>% # drop the rown names
dplyr::select(c("seurat_clusters", "hema_microarray_pearson_0.1_type", "Clusters", "Subpopulations")) %>%
unique() %>%
arrange(seurat_clusters)
## # A tibble: 11 x 4
## seurat_clusters hema_microarray_pearson_0.… Clusters Subpopulations
## <chr> <chr> <chr> <chr>
## 1 0 "Monocyte" Monocytic Mono
## 2 1 "Hematopoietic stem cell_C… Primitive Prim (CD34-di…
## 3 10 "Naive CD4+ T-cell" CD4+ T-cells CD4-T
## 4 2 "Colony Forming Unit-Monoc… CFU-Mono CFU-Mono
## 5 3 "CD8+ Effector Memory" CD8+ T-cells CD8-T
## 6 4 "Myeloid Dendritic Cell" Myeloid Dendritic… MDC
## 7 5 "Erythroid_CD34+ CD71+ Gly… Erythroid (CD34+,… Prim (CD34-hi…
## 8 6 "Naïve B-cells" B-cells B
## 9 7 "Erythroid_CD34- CD71lo Gl… Erythroid (CD34-,… Ery
## 10 8 "Mature NK cell_CD56- CD16… NK-cells NK?
## 11 9 "Hematopoietic stem cell_C… Primitive Prim (CD34-di…
# Display clusters
DimPlot(res, reduction = "umap",
group.by = "Subpopulations")

4.4. Assgin new color schemes for the clusters
# Full names
level.clusters <- c("Monocytic",
"CFU-Mono",
"Myeloid Dendritic Cell",
"Primitive",
"Erythroid (CD34+,CD71+,GlyA-)",
"Erythroid (CD34-,CD71lo,GlyA+)",
"B-cells",
"CD4+ T-cells",
"CD8+ T-cells",
"NK-cells"
)
# short names
level.Subpopulations <- c("Mono",
"CFU-Mono",
"MDC",
"Prim (CD34-dim)",
"Prim (CD34-high)",
"Ery",
"B",
"CD4-T",
"CD8-T",
"NK?"
)
res$Clusters <- factor(res$Clusters, levels = level.clusters)
res$Subpopulations <- factor(res$Subpopulations, levels = level.Subpopulations)
palette.lscx <- c( "#b30024", #"Monocyte",
"#fa1e20",#"Colony Forming Unit-Monocyte ",
"#ff7f00", #"Myeloid Dendritic Cell",
"#0b47d4",#"Hematopoietic stem cell_CD133+ CD34dim",
"#5a95d1", #"Erythroid_CD34+ CD71+ GlyA-"
"#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
"#20b2aa", #"Naïve B-cells",
"#1cad15", # "Naive CD4+ T-cell",
"#b2df8a", #"CD8+ Effector Memory",
"#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"
show_col(palette.lscx)

DimPlot(res, reduction = "umap",
group.by = "Subpopulations",
#split.by = "type.lsc",
cols = palette.lscx,
label = T)

VlnPlot(res, "CD34", group.by = "Subpopulations", cols = palette.lscx) # Primitive marker

VlnPlot(res, "KIT", group.by = "Subpopulations", cols = palette.lscx) # Primitive marker

VlnPlot(res, "GYPA", group.by = "Subpopulations", cols = palette.lscx) # Erythroid marker Glycophorin A

VlnPlot(res, "adt_CD4", group.by = "Subpopulations", cols = palette.lscx) # T cell marker CD4

VlnPlot(res, "CD8A", group.by = "Subpopulations", cols = palette.lscx) # T cell marker CD8

VlnPlot(res, "adt_CD19", group.by = "Subpopulations", cols = palette.lscx) # B cell marker CD19

VlnPlot(res, "NLRP3", group.by = "Subpopulations", cols = palette.lscx)

res@meta.data %>%
as_tibble() %>%
dplyr::select(htb, type.lsc, type.pheno, type.tissue, type.disease) %>%
unique() %>%
DT::datatable()
saveRDS(res, "~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/SSP_LSCx_version1_downsampled_20210125_harmonized_clustered_res.Rds")