Analysis of LSCx_v1_20210125 for Fig.6

1. Set up
2. Input Seurat object
3. Initial check
4. Clustify
5. Make Fig. 6
For LSCx project
For NLRP3
For Pollyea

1. Set up

library(biomaRt)
library(knitr)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks biomaRt::select()

library(dplyr)
library(Seurat)

## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli

## Attaching SeuratObject

#remotes::install_github("rnabioco/clustifyrdata")
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(ggpubr)
library(jpeg)

# opts_chunk$set(engine.path = '/usr/local/bin/python3')
# knitr::opts_chunk$set(cache = TRUE)
# # knitr::opts_chunk$set(echo = FALSE)
# knitr::opts_chunk$set(warning = FALSE)
# knitr::opts_chunk$set(message = FALSE)
# knitr::opts_chunk$set(cache.lazy = FALSE)

2. Input Seurat object

# Read in pre-processed and harmonlized object by Austin
lscx <- readRDS("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/from_Austin/LSCx_version1_harmonized_clustered_downsampled.rds") # to make sure all of us are starting on same object.

# change the orig.ident to a cool name
lscx@meta.data$orig.ident <- "lscx"

# Read in csv file storing new meta.data features needed for fig.6
meta.suppl <- read.csv("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/lscx_meta_clinic_supple_v1.csv")

# Add in new features into the meta.data file
cell.id <- rownames(lscx@meta.data) #store cell id
lscx@meta.data <- left_join(lscx@meta.data, meta.suppl, by = "sample") # left_join new features into meta.data table
rownames(lscx@meta.data) <- cell.id # add back cell id

# Get some useful variables
type.lsc <- unique(lscx$type.lsc)
type.pheno <- unique(lscx$type.pheno)
type.tissue <- unique(lscx$type.tissue)
type.disease <- unique(lscx$type.disease)
sample <- unique(lscx$sample)

# get adt
adt <- purrr::map_chr(rownames(lscx@assays$ADT@counts), ~ paste0("adt_", .)) # add "adt_" in front of all antigens. 

# Check new features added
type.lsc

## [1] "unknown"  "Type-A"   "Type-B"   "Type-B-R" "Normal"

type.pheno

## [1] "Prim"    "Mono"    "unknown" "MMP"     "Normal"

type.tissue

## [1] "AML" "HTB" "NBM"

type.disease

## [1] "Rl-chemo" "Dx"       "unknown"  "Rl-ven"   "Normal"

#saveRDS(lscx, "~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/LSCx_version1_downsampled_20210125_harmonized_clustered_ssp.Rds")

3. Initial check

# According to Austin, $umap is the reduction to use and it is in fact the harmony-based umap
# despite the "goofy" name, he said...

# Plot all adt markers to get an intial sense of the clusters
lscx %>%
  SetIdent(., value = "type.tissue") %>%
  subset(., idents = c("AML", "HTB")) %>%
  FeaturePlot(., 
              adt,
              reduction = "umap",
              min.cutoff = "q5", 
              max.cutoff = "q95",
              pt.size = 0.1)

## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD70.

## Warning in FeaturePlot(., adt, reduction = "umap", min.cutoff = "q5", max.cutoff
## = "q95", : All cells have the same value (0) of adt_CD27.

# DimPlot(lscx, reduction = "umap", split.by = "type.lsc") # Austin said always use reduction = "umap" to project the harmonized umap
# DimPlot(lscx, reduction = "umap", split.by = "type.pheno")
# DimPlot(lscx, reduction = "umap", split.by = "type.tissue")
# DimPlot(lscx, reduction = "umap", split.by = "type.disease")

4. Clustify

4.1. Trying different compute methods

# Download hema_microarray data from the DMAP study (Novershtern et al.Cell 2011)
dmap <- clustifyrdatahub::ref_hema_microarray()

## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27

## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation

## loading from cache

# clustify using cosine
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_cosine",
                compute_method = "cosine", #spearman, pearson, kendall, cosine
                verbose = T)

## using # of genes: 1223

## using threshold of 0.5

unique(res$hema_microarray_cosine_type)

## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Erythroid_CD34+ CD71+ GlyA-"           
## [4] "Naïve B-cells"                         
## [5] "CD8+ Effector Memory"                  
## [6] "Erythroid_CD34- CD71+ GlyA+"           
## [7] "unassigned"

# clustify using spearman
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_spearman",
                compute_method = "spearman", #spearman, pearson, kendall, cosine
                verbose = T)

## using # of genes: 1223

## using threshold of 0.54

unique(res$hema_microarray_spearman_type)

## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Naïve B-cells"                         
## [4] "CD8+ Effector Memory RA"               
## [5] "Erythroid_CD34- CD71+ GlyA-"           
## [6] "unassigned"                            
## [7] "CD4+ Central Memory"

# clustify using kendall
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0, #let clustify assign threshold 
                rename_prefix = "hema_microarray_kendall",
                compute_method = "kendall", #spearman, pearson, kendall, cosine
                verbose = T)

## using # of genes: 1223

## using threshold of 0.39

unique(res$hema_microarray_kendall_type)

## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Monocyte"                              
## [3] "Naïve B-cells"                         
## [4] "CD8+ Effector Memory RA"               
## [5] "Erythroid_CD34- CD71+ GlyA-"           
## [6] "unassigned"                            
## [7] "CD4+ Central Memory"

# clustify using pearson
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                #threshold = 0.2, #let clustify assign threshold 
                rename_prefix = "hema_microarray_pearson",
                compute_method = "pearson", #spearman, pearson, kendall, cosine
                verbose = T)

## using # of genes: 1223

## using threshold of 0.52

unique(res$hema_microarray_pearson_type)

## [1] "Hematopoietic stem cell_CD133+ CD34dim"
## [2] "Myeloid Dendritic Cell"                
## [3] "unassigned"                            
## [4] "Naïve B-cells"                         
## [5] "Monocyte"                              
## [6] "CD8+ Effector Memory"                  
## [7] "Erythroid_CD34- CD71lo GlyA+"          
## [8] "Mature NK cell_CD56- CD16+ CD3-"       
## [9] "Naive CD4+ T-cell"

# pearson seems to give us best assignments at auto-assigned threshold

4.2. Trying different threshold settings using the pearson method

# Set threshold to many values and test their influence on assignments
res <- clustify(lscx, 
                ref_mat = dmap, #can also use "cbmc_ref"
                cluster_col = "seurat_clusters",
                threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
                rename_prefix = "hema_microarray_pearson_0.1",
                compute_method = "pearson", #spearman, pearson, kendall, cosine
                verbose = T)

## using # of genes: 1223

# Check the assignments
res@meta.data %>%
  as_tibble() %>% # drop the rown names
  dplyr::select(c("sct_pca_snn_res.0.2", "seurat_clusters", "hema_microarray_pearson_0.1_type")) %>%
  unique() %>%
  arrange(seurat_clusters)

## # A tibble: 11 x 3
##    sct_pca_snn_res.0.2 seurat_clusters hema_microarray_pearson_0.1_type        
##    <fct>               <chr>           <chr>                                   
##  1 0                   0               "Monocyte"                              
##  2 1                   1               "Hematopoietic stem cell_CD133+ CD34dim"
##  3 10                  10              "Naive CD4+ T-cell"                     
##  4 2                   2               "Colony Forming Unit-Monocyte "         
##  5 3                   3               "CD8+ Effector Memory"                  
##  6 4                   4               "Myeloid Dendritic Cell"                
##  7 5                   5               "Erythroid_CD34+ CD71+ GlyA-"           
##  8 6                   6               "Naïve B-cells"                         
##  9 7                   7               "Erythroid_CD34- CD71lo GlyA+"          
## 10 8                   8               "Mature NK cell_CD56- CD16+ CD3-"       
## 11 9                   9               "Hematopoietic stem cell_CD133+ CD34dim"

# Display clusters 
DimPlot(res, reduction = "umap", 
        group.by = "hema_microarray_pearson_0.1_type")

4.3. Simplify cluster names

current.cluster.names <- unique(res$hema_microarray_pearson_0.1_type)

new.cluster.names <- c("Primitive", #"Hematopoietic stem cell_CD133+ CD34dim",
                       "Myeloid Dendritic Cell",# "Myeloid Dendritic Cell",
                       "Erythroid (CD34+,CD71+,GlyA-)", # "Erythroid_CD34+ CD71+ GlyA-",
                       "CFU-Mono",#"Colony Forming Unit-Monocyte ",
                       "B-cells", # "Naïve B-cells",
                       "Monocytic", #"Monocyte"
                       "CD8+ T-cells",# "CD8+ Effector Memory",
                       "Erythroid (CD34-,CD71lo,GlyA+)", # "Erythroid_CD34- CD71lo GlyA+",
                       "NK-cells",# "Mature NK cell_CD56- CD16+ CD3-"
                        "CD4+ T-cells")# "Naive CD4+ T-cell",

res$Clusters <- plyr::mapvalues(x=res$hema_microarray_pearson_0.1_type,
                                                        from = current.cluster.names,
                                                        to = new.cluster.names)# Check the assignments
res@meta.data %>%
  as_tibble() %>% # drop the rown names
  dplyr::select(c("seurat_clusters", "hema_microarray_pearson_0.1_type", "Clusters")) %>%
  unique() %>%
  arrange(seurat_clusters)

## # A tibble: 11 x 3
##    seurat_clusters hema_microarray_pearson_0.1_type    Clusters                 
##    <chr>           <chr>                               <chr>                    
##  1 0               "Monocyte"                          Monocytic                
##  2 1               "Hematopoietic stem cell_CD133+ CD… Primitive                
##  3 10              "Naive CD4+ T-cell"                 CD4+ T-cells             
##  4 2               "Colony Forming Unit-Monocyte "     CFU-Mono                 
##  5 3               "CD8+ Effector Memory"              CD8+ T-cells             
##  6 4               "Myeloid Dendritic Cell"            Myeloid Dendritic Cell   
##  7 5               "Erythroid_CD34+ CD71+ GlyA-"       Erythroid (CD34+,CD71+,G…
##  8 6               "Naïve B-cells"                     B-cells                  
##  9 7               "Erythroid_CD34- CD71lo GlyA+"      Erythroid (CD34-,CD71lo,…
## 10 8               "Mature NK cell_CD56- CD16+ CD3-"   NK-cells                 
## 11 9               "Hematopoietic stem cell_CD133+ CD… Primitive

# Display clusters 
DimPlot(res, reduction = "umap", 
        group.by = "Clusters")

4.4. Assgin new color schemes for the clusters

level.lscx <- c("Monocytic",
                "CFU-Mono",
                "Myeloid Dendritic Cell",
                "Primitive",
                "Erythroid (CD34+,CD71+,GlyA-)",
                "Erythroid (CD34-,CD71lo,GlyA+)",
                "B-cells",
                "CD4+ T-cells",
                "CD8+ T-cells",
                "NK-cells"
                )

res$Clusters <- factor(res$Clusters, levels = level.lscx)

palette.lscx <- c( "#b30024", #"Monocyte",
                  "#e31a1c",#"Colony Forming Unit-Monocyte ",
                   "#ff7f00", #"Myeloid Dendritic Cell",
                   "#08519c",#"Hematopoietic stem cell_CD133+ CD34dim",
                   "#762a83", #"Erythroid_CD34+ CD71+ GlyA-"
                  "#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
                   "#20b2aa", #"Naïve B-cells",
                   "#1cad15", # "Naive CD4+ T-cell",
                   "#b2df8a", #"CD8+ Effector Memory",
                   "#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"

show_col(palette.lscx)

DimPlot(res, reduction = "umap", 
        group.by = "Clusters", 
        #split.by = "type.lsc",
        cols = palette.lscx,
        label = F)

5. Make Fig. 6

Fig. 6a. overview of specimens in a table

res@meta.data %>%
  as_tibble() %>%
  dplyr::select(htb, type.lsc, type.pheno, type.tissue, type.disease) %>%
  unique() %>%
  DT::datatable()

Fig. 6b. umap of all subgroups

res$type.lsc <- factor(res$type.lsc, levels = c("Normal", "Type-A", "Type-B", "Type-B-R", "unknown"))
res$type.disease <- factor(res$type.disease, levels = c("Normal", "Dx", "Rl-chemo", "Rl-ven", "unknown"))

resr <- res %>%
  SetIdent(., value = "type.lsc") %>%
  subset(., idents = c("Normal","Type-A", "Type-B", "Type-B-R"))

resr %>%
  DimPlot(., reduction = "umap", 
        group.by = "Clusters", 
        split.by = "type.lsc",
        cols = palette.lscx,
        label = F)

ggplot(resr@meta.data, aes(x = type.lsc, fill = Clusters)) + 
  geom_bar(position = "fill",stat = "count") +
  scale_y_continuous(labels = scales::percent_format()) +
  cowplot::theme_cowplot() +
  scale_color_manual(values = palette.lscx, aesthetics = c("colour", "fill"))

Fig. 6c. RNA velocity analysis

Fig. 6d. gprofiler analysis

Fig. 6e. Hypergeometric analysis of LSC signatures

Fig. 6f. Paired dx and rl specimens from ven/aza

res.1261 <- res %>%
  SetIdent(., value = "htb") %>%
  subset(., idents = c("1261"))

res.1467 <- res %>%
  SetIdent(., value = "htb") %>%
  subset(., idents = c("1467"))

p1 <- res.1261 %>%
  FeaturePlot(.,
              "HOXA9",
              reduction = "umap",
              split.by = "type.disease")

p2 <- res.1467 %>%
  FeaturePlot(.,
              "HOXA9",
              reduction = "umap",
              split.by = "type.disease")

p <- ggarrange(p1, p2, ncol = 1)

annotate_figure(p,
                left = "HTB-1467                               HTB-1261")

Analysis of LSCx_v1_20210125 for Fig.6

02 February, 2021

1. Set up

2. Input Seurat object

3. Initial check

4. Clustify

4.1. Trying different compute methods

4.2. Trying different threshold settings using the pearson method

4.3. Simplify cluster names

4.4. Assgin new color schemes for the clusters

5. Make Fig. 6

Fig. 6a. overview of specimens in a table

Fig. 6b. umap of all subgroups

Fig. 6c. RNA velocity analysis

Fig. 6d. gprofiler analysis

Fig. 6e. Hypergeometric analysis of LSC signatures

Fig. 6f. Paired dx and rl specimens from ven/aza

For LSCx project

For NLRP3

For Pollyea