1. Set up
library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(dplyr)
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
## Attaching SeuratObject
#remotes::install_github("rnabioco/clustifyrdata")
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(ggpubr)
library(jpeg)
# opts_chunk$set(engine.path = '/usr/local/bin/python3')
# knitr::opts_chunk$set(cache = TRUE)
# # knitr::opts_chunk$set(echo = FALSE)
# knitr::opts_chunk$set(warning = FALSE)
# knitr::opts_chunk$set(message = FALSE)
# knitr::opts_chunk$set(cache.lazy = FALSE)
3. Initial check
# According to Austin, $umap is the reduction to use and it is in fact the harmony-based umap
# despite the "goofy" name, he said...
# Plot all adt markers to get an intial sense of the clusters
lscx %>%
SetIdent(., value = "type.tissue") %>%
subset(., idents = c("AML", "HTB")) %>%
FeaturePlot(.,
adt,
reduction = "umap",
min.cutoff = "q5",
max.cutoff = "q95",
pt.size = 0.1)
## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD70.
## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD27.

# DimPlot(lscx, reduction = "umap", split.by = "type.lsc") # Austin said always use reduction = "umap" to project the harmonized umap
# DimPlot(lscx, reduction = "umap", split.by = "type.pheno")
# DimPlot(lscx, reduction = "umap", split.by = "type.tissue")
# DimPlot(lscx, reduction = "umap", split.by = "type.disease")
4. Clustify
4.1. Trying different compute methods
# Download hema_microarray data from the DMAP study (Novershtern et al.Cell 2011)
dmap <- clustifyrdatahub::ref_hema_microarray()
## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27
## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation
## loading from cache
# clustify using cosine
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_cosine",
compute_method = "cosine", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.5
unique(res$hema_microarray_cosine_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Erythroid_CD34+ CD71+ GlyA-"
## [4] "Naïve B-cells"
## [5] "CD8+ Effector Memory"
## [6] "Erythroid_CD34- CD71+ GlyA+"
## [7] "unassigned"
# clustify using spearman
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_spearman",
compute_method = "spearman", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.54
unique(res$hema_microarray_spearman_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Naïve B-cells"
## [4] "CD8+ Effector Memory RA"
## [5] "Erythroid_CD34- CD71+ GlyA-"
## [6] "unassigned"
## [7] "CD4+ Central Memory"
# clustify using kendall
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0, #let clustify assign threshold
rename_prefix = "hema_microarray_kendall",
compute_method = "kendall", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.39
unique(res$hema_microarray_kendall_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"
## [3] "Naïve B-cells"
## [4] "CD8+ Effector Memory RA"
## [5] "Erythroid_CD34- CD71+ GlyA-"
## [6] "unassigned"
## [7] "CD4+ Central Memory"
# clustify using pearson
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
#threshold = 0.2, #let clustify assign threshold
rename_prefix = "hema_microarray_pearson",
compute_method = "pearson", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
## using threshold of 0.52
unique(res$hema_microarray_pearson_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Myeloid Dendritic Cell"
## [3] "unassigned"
## [4] "Naïve B-cells"
## [5] "Monocyte"
## [6] "CD8+ Effector Memory"
## [7] "Erythroid_CD34- CD71lo GlyA+"
## [8] "Mature NK cell_CD56- CD16+ CD3-"
## [9] "Naive CD4+ T-cell"
# pearson seems to give us best assignments at auto-assigned threshold
4.2. Trying different threshold settings using the pearson method
# Set threshold to many values and test their influence on assignments
res <- clustify(lscx,
ref_mat = dmap, #can also use "cbmc_ref"
cluster_col = "seurat_clusters",
threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
rename_prefix = "hema_microarray_pearson_0.1",
compute_method = "pearson", #spearman, pearson, kendall, cosine
verbose = T)
## using # of genes: 1223
# Check the assignments
res@meta.data %>%
as_tibble() %>% # drop the rown names
dplyr::select(c("sct_pca_snn_res.0.2", "seurat_clusters", "hema_microarray_pearson_0.1_type")) %>%
unique() %>%
arrange(seurat_clusters)
## # A tibble: 11 x 3
## sct_pca_snn_res.0.2 seurat_clusters hema_microarray_pearson_0.1_type
## <fct> <chr> <chr>
## 1 0 0 "Monocyte"
## 2 1 1 "Hematopoietic stem cell_CD133+ CD34dim"
## 3 10 10 "Naive CD4+ T-cell"
## 4 2 2 "Colony Forming Unit-Monocyte "
## 5 3 3 "CD8+ Effector Memory"
## 6 4 4 "Myeloid Dendritic Cell"
## 7 5 5 "Erythroid_CD34+ CD71+ GlyA-"
## 8 6 6 "Naïve B-cells"
## 9 7 7 "Erythroid_CD34- CD71lo GlyA+"
## 10 8 8 "Mature NK cell_CD56- CD16+ CD3-"
## 11 9 9 "Hematopoietic stem cell_CD133+ CD34dim"
# Display clusters
DimPlot(res, reduction = "umap",
group.by = "hema_microarray_pearson_0.1_type")

4.3. Simplify cluster names
current.cluster.names <- unique(res$hema_microarray_pearson_0.1_type)
new.cluster.names <- c("Primitive", #"Hematopoietic stem cell_CD133+ CD34dim",
"Myeloid Dendritic Cell",# "Myeloid Dendritic Cell",
"Erythroid (CD34+,CD71+,GlyA-)", # "Erythroid_CD34+ CD71+ GlyA-",
"CFU-Mono",#"Colony Forming Unit-Monocyte ",
"B-cells", # "Naïve B-cells",
"Monocytic", #"Monocyte"
"CD8+ T-cells",# "CD8+ Effector Memory",
"Erythroid (CD34-,CD71lo,GlyA+)", # "Erythroid_CD34- CD71lo GlyA+",
"NK-cells",# "Mature NK cell_CD56- CD16+ CD3-"
"CD4+ T-cells")# "Naive CD4+ T-cell",
res$Clusters <- plyr::mapvalues(x=res$hema_microarray_pearson_0.1_type,
from = current.cluster.names,
to = new.cluster.names)# Check the assignments
res@meta.data %>%
as_tibble() %>% # drop the rown names
dplyr::select(c("seurat_clusters", "hema_microarray_pearson_0.1_type", "Clusters")) %>%
unique() %>%
arrange(seurat_clusters)
## # A tibble: 11 x 3
## seurat_clusters hema_microarray_pearson_0.1_type Clusters
## <chr> <chr> <chr>
## 1 0 "Monocyte" Monocytic
## 2 1 "Hematopoietic stem cell_CD133+ CD… Primitive
## 3 10 "Naive CD4+ T-cell" CD4+ T-cells
## 4 2 "Colony Forming Unit-Monocyte " CFU-Mono
## 5 3 "CD8+ Effector Memory" CD8+ T-cells
## 6 4 "Myeloid Dendritic Cell" Myeloid Dendritic Cell
## 7 5 "Erythroid_CD34+ CD71+ GlyA-" Erythroid (CD34+,CD71+,G…
## 8 6 "Naïve B-cells" B-cells
## 9 7 "Erythroid_CD34- CD71lo GlyA+" Erythroid (CD34-,CD71lo,…
## 10 8 "Mature NK cell_CD56- CD16+ CD3-" NK-cells
## 11 9 "Hematopoietic stem cell_CD133+ CD… Primitive
# Display clusters
DimPlot(res, reduction = "umap",
group.by = "Clusters")

4.4. Assgin new color schemes for the clusters
level.lscx <- c("Monocytic",
"CFU-Mono",
"Myeloid Dendritic Cell",
"Primitive",
"Erythroid (CD34+,CD71+,GlyA-)",
"Erythroid (CD34-,CD71lo,GlyA+)",
"B-cells",
"CD4+ T-cells",
"CD8+ T-cells",
"NK-cells"
)
res$Clusters <- factor(res$Clusters, levels = level.lscx)
palette.lscx <- c( "#b30024", #"Monocyte",
"#e31a1c",#"Colony Forming Unit-Monocyte ",
"#ff7f00", #"Myeloid Dendritic Cell",
"#08519c",#"Hematopoietic stem cell_CD133+ CD34dim",
"#762a83", #"Erythroid_CD34+ CD71+ GlyA-"
"#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
"#20b2aa", #"Naïve B-cells",
"#1cad15", # "Naive CD4+ T-cell",
"#b2df8a", #"CD8+ Effector Memory",
"#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"
show_col(palette.lscx)

DimPlot(res, reduction = "umap",
group.by = "Clusters",
#split.by = "type.lsc",
cols = palette.lscx,
label = F)

5. Make Fig. 6
Fig. 6a. overview of specimens in a table
res@meta.data %>%
as_tibble() %>%
dplyr::select(htb, type.lsc, type.pheno, type.tissue, type.disease) %>%
unique() %>%
DT::datatable()
Fig. 6b. umap of all subgroups
res$type.lsc <- factor(res$type.lsc, levels = c("Normal", "Type-A", "Type-B", "Type-B-R", "unknown"))
res$type.disease <- factor(res$type.disease, levels = c("Normal", "Dx", "Rl-chemo", "Rl-ven", "unknown"))
resr <- res %>%
SetIdent(., value = "type.lsc") %>%
subset(., idents = c("Normal","Type-A", "Type-B", "Type-B-R"))
resr %>%
DimPlot(., reduction = "umap",
group.by = "Clusters",
split.by = "type.lsc",
cols = palette.lscx,
label = F)

ggplot(resr@meta.data, aes(x = type.lsc, fill = Clusters)) +
geom_bar(position = "fill",stat = "count") +
scale_y_continuous(labels = scales::percent_format()) +
cowplot::theme_cowplot() +
scale_color_manual(values = palette.lscx, aesthetics = c("colour", "fill"))

Fig. 6c. RNA velocity analysis
Fig. 6d. gprofiler analysis
Fig. 6e. Hypergeometric analysis of LSC signatures
Fig. 6f. Paired dx and rl specimens from ven/aza
res.1261 <- res %>%
SetIdent(., value = "htb") %>%
subset(., idents = c("1261"))
res.1467 <- res %>%
SetIdent(., value = "htb") %>%
subset(., idents = c("1467"))
p1 <- res.1261 %>%
FeaturePlot(.,
"HOXA9",
reduction = "umap",
split.by = "type.disease")
p2 <- res.1467 %>%
FeaturePlot(.,
"HOXA9",
reduction = "umap",
split.by = "type.disease")
p <- ggarrange(p1, p2, ncol = 1)
annotate_figure(p,
left = "HTB-1467 HTB-1261")
