1. Set up

library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(dplyr)
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli
## Attaching SeuratObject
#remotes::install_github("rnabioco/clustifyrdata")
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggpubr)
library(jpeg)

# opts_chunk$set(engine.path = '/usr/local/bin/python3')
# knitr::opts_chunk$set(cache = TRUE)
# # knitr::opts_chunk$set(echo = FALSE)
# knitr::opts_chunk$set(warning = FALSE)
# knitr::opts_chunk$set(message = FALSE)
# knitr::opts_chunk$set(cache.lazy = FALSE)

2. Input Seurat object

# Read in pre-processed and harmonlized object by Austin
lscx <- readRDS("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/from_Austin/LSCx_version1_harmonized_clustered_downsampled.rds") # to make sure all of us are starting on same object.

# change the orig.ident to a cool name
lscx@meta.data$orig.ident <- "lscx"

# Read in csv file storing new meta.data features needed for fig.6
meta.suppl <- read.csv("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/lscx_meta_clinic_supple_v1.csv")

# Add in new features into the meta.data file
cell.id <- rownames(lscx@meta.data) #store cell id
lscx@meta.data <- left_join(lscx@meta.data, meta.suppl, by = "sample") # left_join new features into meta.data table
rownames(lscx@meta.data) <- cell.id # add back cell id

# Get some useful variables
type.lsc <- unique(lscx$type.lsc)
type.pheno <- unique(lscx$type.pheno)
type.tissue <- unique(lscx$type.tissue)
type.disease <- unique(lscx$type.disease)
sample <- unique(lscx$sample)

# get adt
adt <- purrr::map_chr(rownames(lscx@assays$ADT@counts), ~ paste0("adt_", .)) # add "adt_" in front of all antigens. 

# Check new features added
type.lsc
## [1] "unknown"  "Type-A"   "Type-B"   "Type-B-R" "Normal"
type.pheno
## [1] "Prim"    "Mono"    "unknown" "MMP"     "Normal"
type.tissue
## [1] "AML" "HTB" "NBM"
type.disease
## [1] "Rl-chemo" "Dx"       "unknown"  "Rl-ven"   "Normal"
#saveRDS(lscx, "~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/LSCx_version1_downsampled_20210125_harmonized_clustered_ssp.Rds")

3. Initial check

# According to Austin, $umap is the reduction to use and it is in fact the harmony-based umap
# despite the "goofy" name, he said...

# Plot all adt markers to get an intial sense of the clusters
lscx %>%
  SetIdent(., value = "type.tissue") %>%
  subset(., idents = c("AML", "HTB")) %>%
  FeaturePlot(., 
              adt,
              reduction = "umap",
              min.cutoff = "q5", 
              max.cutoff = "q95",
              pt.size = 0.1)
## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD70.
## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD27.

# DimPlot(lscx, reduction = "umap", split.by = "type.lsc") # Austin said always use reduction = "umap" to project the harmonized umap
# DimPlot(lscx, reduction = "umap", split.by = "type.pheno")
# DimPlot(lscx, reduction = "umap", split.by = "type.tissue")
# DimPlot(lscx, reduction = "umap", split.by = "type.disease")

4. Clustify

4.1. Trying different compute methods

# Download hema_microarray data from the DMAP study (Novershtern et al.Cell 2011)
dmap <- clustifyrdatahub::ref_hema_microarray() 
## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27
## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation
## loading from cache
# clustify using cosine
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_cosine",
                compute_method = "cosine", #spearman, pearson, kendall, cosine
                verbose = T)
## using # of genes: 1223
## using threshold of 0.5
unique(res$hema_microarray_cosine_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Erythroid_CD34+ CD71+ GlyA-"           
## [4] "Naïve B-cells"                         
## [5] "CD8+ Effector Memory"                  
## [6] "Erythroid_CD34- CD71+ GlyA+"           
## [7] "unassigned"
# clustify using spearman
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_spearman",
                compute_method = "spearman", #spearman, pearson, kendall, cosine
                verbose = T)
## using # of genes: 1223
## using threshold of 0.54
unique(res$hema_microarray_spearman_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Naïve B-cells"                         
## [4] "CD8+ Effector Memory RA"               
## [5] "Erythroid_CD34- CD71+ GlyA-"           
## [6] "unassigned"                            
## [7] "CD4+ Central Memory"
# clustify using kendall
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_kendall",
                compute_method = "kendall", #spearman, pearson, kendall, cosine
                verbose = T)
## using # of genes: 1223
## using threshold of 0.39
unique(res$hema_microarray_kendall_type)
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Naïve B-cells"                         
## [4] "CD8+ Effector Memory RA"               
## [5] "Erythroid_CD34- CD71+ GlyA-"           
## [6] "unassigned"                            
## [7] "CD4+ Central Memory"
# clustify using pearson
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0.2, #let clustify assign threshold 
                rename_prefix = "hema_microarray_pearson",
                compute_method = "pearson", #spearman, pearson, kendall, cosine
                verbose = T)
## using # of genes: 1223
## using threshold of 0.52
unique(res$hema_microarray_pearson_type) 
## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Myeloid Dendritic Cell"                
## [3] "unassigned"                            
## [4] "Naïve B-cells"                         
## [5] "Monocyte"                              
## [6] "CD8+ Effector Memory"                  
## [7] "Erythroid_CD34- CD71lo GlyA+"          
## [8] "Mature NK cell_CD56- CD16+ CD3-"       
## [9] "Naive CD4+ T-cell"
# pearson seems to give us best assignments at auto-assigned threshold

4.2. Trying different threshold settings using the pearson method

# Set threshold to many values and test their influence on assignments
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
                rename_prefix = "hema_microarray_pearson_0.1",
                compute_method = "pearson", #spearman, pearson, kendall, cosine
                verbose = T)
## using # of genes: 1223
# Check the assignments
res@meta.data %>%
  as_tibble() %>% # drop the rown names
  dplyr::select(c("sct_pca_snn_res.0.2", "seurat_clusters", "hema_microarray_pearson_0.1_type")) %>%
  unique() %>%
  arrange(seurat_clusters)
## # A tibble: 11 x 3
##    sct_pca_snn_res.0.2 seurat_clusters hema_microarray_pearson_0.1_type        
##    <fct>               <chr>           <chr>                                   
##  1 0                   0               "Monocyte"                              
##  2 1                   1               "Hematopoietic stem cell_CD133+ CD34dim"
##  3 10                  10              "Naive CD4+ T-cell"                     
##  4 2                   2               "Colony Forming Unit-Monocyte "         
##  5 3                   3               "CD8+ Effector Memory"                  
##  6 4                   4               "Myeloid Dendritic Cell"                
##  7 5                   5               "Erythroid_CD34+ CD71+ GlyA-"           
##  8 6                   6               "Naïve B-cells"                         
##  9 7                   7               "Erythroid_CD34- CD71lo GlyA+"          
## 10 8                   8               "Mature NK cell_CD56- CD16+ CD3-"       
## 11 9                   9               "Hematopoietic stem cell_CD133+ CD34dim"
# Display clusters 
DimPlot(res, reduction = "umap", 
        group.by = "hema_microarray_pearson_0.1_type")

4.3. Simplify cluster names

current.cluster.names <- unique(res$hema_microarray_pearson_0.1_type)

new.cluster.names <- c("Primitive", #"Hematopoietic stem cell_CD133+ CD34dim",
                       "Myeloid Dendritic Cell",# "Myeloid Dendritic Cell",
                       "Erythroid (CD34+,CD71+,GlyA-)", # "Erythroid_CD34+ CD71+ GlyA-",
                       "CFU-Mono",#"Colony Forming Unit-Monocyte ",
                       "B-cells", # "Naïve B-cells",
                       "Monocytic", #"Monocyte"
                       "CD8+ T-cells",# "CD8+ Effector Memory",
                       "Erythroid (CD34-,CD71lo,GlyA+)", # "Erythroid_CD34- CD71lo GlyA+",
                       "NK-cells",# "Mature NK cell_CD56- CD16+ CD3-"
                        "CD4+ T-cells")# "Naive CD4+ T-cell",

res$Clusters <- plyr::mapvalues(x=res$hema_microarray_pearson_0.1_type,
                                                        from = current.cluster.names,
                                                        to = new.cluster.names)# Check the assignments
res@meta.data %>%
  as_tibble() %>% # drop the rown names
  dplyr::select(c("seurat_clusters", "hema_microarray_pearson_0.1_type", "Clusters")) %>%
  unique() %>%
  arrange(seurat_clusters)
## # A tibble: 11 x 3
##    seurat_clusters hema_microarray_pearson_0.1_type    Clusters                 
##    <chr>           <chr>                               <chr>                    
##  1 0               "Monocyte"                          Monocytic                
##  2 1               "Hematopoietic stem cell_CD133+ CD… Primitive                
##  3 10              "Naive CD4+ T-cell"                 CD4+ T-cells             
##  4 2               "Colony Forming Unit-Monocyte "     CFU-Mono                 
##  5 3               "CD8+ Effector Memory"              CD8+ T-cells             
##  6 4               "Myeloid Dendritic Cell"            Myeloid Dendritic Cell   
##  7 5               "Erythroid_CD34+ CD71+ GlyA-"       Erythroid (CD34+,CD71+,G…
##  8 6               "Naïve B-cells"                     B-cells                  
##  9 7               "Erythroid_CD34- CD71lo GlyA+"      Erythroid (CD34-,CD71lo,…
## 10 8               "Mature NK cell_CD56- CD16+ CD3-"   NK-cells                 
## 11 9               "Hematopoietic stem cell_CD133+ CD… Primitive
# Display clusters 
DimPlot(res, reduction = "umap", 
        group.by = "Clusters")

4.4. Assgin new color schemes for the clusters

level.lscx <- c("Monocytic",
                "CFU-Mono",
                "Myeloid Dendritic Cell",
                "Primitive",
                "Erythroid (CD34+,CD71+,GlyA-)",
                "Erythroid (CD34-,CD71lo,GlyA+)",
                "B-cells",
                "CD4+ T-cells",
                "CD8+ T-cells",
                "NK-cells"
                )

res$Clusters <- factor(res$Clusters, levels = level.lscx)

palette.lscx <- c( "#b30024", #"Monocyte",
                  "#e31a1c",#"Colony Forming Unit-Monocyte ",
                   "#ff7f00", #"Myeloid Dendritic Cell",
                   "#08519c",#"Hematopoietic stem cell_CD133+ CD34dim",
                   "#762a83", #"Erythroid_CD34+ CD71+ GlyA-"
                  "#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
                   "#20b2aa", #"Naïve B-cells",
                   "#1cad15", # "Naive CD4+ T-cell",
                   "#b2df8a", #"CD8+ Effector Memory",
                   "#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"

show_col(palette.lscx)

DimPlot(res, reduction = "umap", 
        group.by = "Clusters", 
        #split.by = "type.lsc",
        cols = palette.lscx,
        label = F)

5. Make Fig. 6

Fig. 6a. overview of specimens in a table

res@meta.data %>%
  as_tibble() %>%
  dplyr::select(htb, type.lsc, type.pheno, type.tissue, type.disease) %>%
  unique() %>%
  DT::datatable()

Fig. 6b. umap of all subgroups

res$type.lsc <- factor(res$type.lsc, levels = c("Normal", "Type-A", "Type-B", "Type-B-R", "unknown"))
res$type.disease <- factor(res$type.disease, levels = c("Normal", "Dx", "Rl-chemo", "Rl-ven", "unknown"))

resr <- res %>%
  SetIdent(., value = "type.lsc") %>%
  subset(., idents = c("Normal","Type-A", "Type-B", "Type-B-R"))

resr %>%
  DimPlot(., reduction = "umap", 
        group.by = "Clusters", 
        split.by = "type.lsc",
        cols = palette.lscx,
        label = F)

ggplot(resr@meta.data, aes(x = type.lsc, fill = Clusters)) + 
  geom_bar(position = "fill",stat = "count") +
  scale_y_continuous(labels = scales::percent_format()) +
  cowplot::theme_cowplot() +
  scale_color_manual(values = palette.lscx, aesthetics = c("colour", "fill"))

Fig. 6c. RNA velocity analysis

Fig. 6d. gprofiler analysis

Fig. 6e. Hypergeometric analysis of LSC signatures

Fig. 6f. Paired dx and rl specimens from ven/aza

res.1261 <- res %>%
  SetIdent(., value = "htb") %>%
  subset(., idents = c("1261"))

res.1467 <- res %>%
  SetIdent(., value = "htb") %>%
  subset(., idents = c("1467"))

p1 <- res.1261 %>%
  FeaturePlot(.,
              "HOXA9",
              reduction = "umap",
              split.by = "type.disease")

p2 <- res.1467 %>%
  FeaturePlot(.,
              "HOXA9",
              reduction = "umap",
              split.by = "type.disease")

p <- ggarrange(p1, p2, ncol = 1)

annotate_figure(p,
                left = "HTB-1467                               HTB-1261")

For LSCx project

For NLRP3

For Pollyea