Set up

library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.2
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(Seurat)
#devtools::install_github("rnabioco/clustifyrdata")
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(jpeg)

opts_chunk$set(engine.path = '/usr/local/bin/python3')
knitr::opts_chunk$set(cache = TRUE)
# knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(cache.lazy = FALSE)

Data input

# Define global variables
DIR.407 <- "~/sandbox/uhg/cite-seq/release1.1_20200416/"
uhg_base <- readRDS(paste(DIR.407,"raw/uhg_version1.1_20200416_baseline.Rds", sep = ""))

# # Bug 1: Converst sct_clusters into factors
# uhg_base@meta.data$sct_clusters <- as.factor(uhg_base@meta.data$sct_clusters)
# 
# # Bug 2:
# uhg_base@assays$ADT@counts[is.na(uhg_base@assays$ADT@counts)] <- 0
# uhg_base@assays$ADT@data[is.na(uhg_base@assays$ADT@data)] <- 0
# 
uhg_base@meta.data %>%
  group_by(htb) %>%
  summarise(n=n()) %>%
  arrange(n)
## # A tibble: 20 x 2
##    htb          n
##    <chr>    <int>
##  1 HTB-664    464
##  2 HTB-976    886
##  3 HTB-1285   997
##  4 HTB-483   1002
##  5 HTB-970   1044
##  6 HTB-1283  1089
##  7 HTB-1471  1292
##  8 HTB-1258  1439
##  9 HTB-1526  1439
## 10 HTB-1390  1822
## 11 HTB-1466  2437
## 12 HTB-1325  2744
## 13 HTB-1387  3246
## 14 HTB-1456  4077
## 15 HTB-1308  4759
## 16 HTB-1556  5192
## 17 HTB-1496  7620
## 18 HTB-1375  8147
## 19 HTB-1273  8827
## 20 Anchor   13147
# 
# # replace orig.ident with more relevant info
# uhg_base@meta.data$orig.ident <- paste(uhg_base@meta.data$response,uhg_base@meta.data$htb,sep = "-") 
# 
# relapce NA with anchor
 uhg_base$response <- forcats::fct_explicit_na(uhg_base$response, "Anchor")

# remove anchor
#uhg_base <- SetIdent(uhg_base, value = "response") %>% subset(idents = c("R", "NR"))
# # Subset an objedt wiith 464 cells from each HTB
# uhg_base_10k <- SetIdent(uhg_base, value = "htb") %>% subset(downsample = 464) # 464 is the HTB with least amount of cells

Initial check

# plot all samples
DimPlot(uhg_base, group.by = "htb_response") + xlab("UMAP_1") + ylab("UMAP_2")

DimPlot(uhg_base, group.by = "v1.0_clusters") + xlab("UMAP_1") + ylab("UMAP_2")

DimPlot(uhg_base, group.by = "clusters") + xlab("UMAP_1") + ylab("UMAP_2") # v1.1 clusters

Plot clustifyr results

liberal clusters

#uhg_base@meta.data$named_clusters %>% unique()


# define cell types in order
level.liberal <- c("Monocyte",
                  "Common myeloid progenitor",
                    "HSC_CD133+ CD34dim",
                    "HSC_CD38- CD34+",
                    "Myeloid Dendritic Cell",
                    "CD4+ T cells",
                    "CD8+ T cells",
                    "B cells",
                    "Erythroid_CD34- CD71lo GlyA+")

# convert cell types to factors
uhg_base@meta.data$named_clusters <- factor(uhg_base@meta.data$named_clusters, levels = level.liberal)

# define color palette
color.liberal = c("#e31a1c", #"Monocyte",
                  "#762a83", #"Common myeloid progenitor",
                    "#08519c", #"HSC_CD133+ CD34dim",
                    "mediumblue", #"HSC_CD38- CD34+",
                    "#ff7f00", #"Myeloid Dendritic Cell",
                    "#33a02c", # "CD4+ T cells",
                    "#b2df8a", #"CD8+ T cells",
                    "#20b2aa", #"B cells",#006d2c
                    "#c51b7d") #"Erythroid_CD34- CD71lo GlyA+"

show_col(color.liberal)

p <- DimPlot(uhg_base, group.by = "named_clusters", cols = color.liberal, pt.size = 0.1, label = F, label.size = 3) + xlab("UMAP_1") + ylab("UMAP_2")

print(p)

pdf(paste(DIR.407,"output/","uhg_base_all_clustifyr_liberal.pdf",sep = ""), width = 10, height = 6)
print(p)
dev.off()
## quartz_off_screen 
##                 2
#DimPlot(uhg_base, group.by = "named_clusters", cols = color.liberal, pt.size = 0.1, split.by = "response") + xlab("UMAP_1") + ylab("UMAP_2")

conserved clusters

uhg_base@meta.data$named_subclusters %>% unique()
##  [1] "HSC_CD133+ CD34dim_3"         "B cells"                     
##  [3] "Erythroid_CD34- CD71lo GlyA+" "HSC_CD133+ CD34dim_1"        
##  [5] "HSC_CD38- CD34+"              "Myeloid Dendritic Cell"      
##  [7] "CD8+ T cells"                 "Monocyte_2"                  
##  [9] "HSC_CD133+ CD34dim_6"         "CD4+ T cells"                
## [11] "Common myeloid progenitor_1"  "HSC_CD133+ CD34dim_5"        
## [13] "HSC_CD133+ CD34dim_4"         "Monocyte_1"                  
## [15] "Common myeloid progenitor_2"  "HSC_CD133+ CD34dim_7"        
## [17] "HSC_CD133+ CD34dim_2"         "HSC_CD133+ CD34dim_8"        
## [19] "Common myeloid progenitor_3"
level.conserved <- c("Monocyte_1",
                     "Monocyte_2",
                     "Common myeloid progenitor_1",
                     "Common myeloid progenitor_2",
                     "Common myeloid progenitor_3",
                     "HSC_CD133+ CD34dim_1",
                     "HSC_CD133+ CD34dim_2",
                     "HSC_CD133+ CD34dim_3",
                     "HSC_CD133+ CD34dim_4",
                     "HSC_CD133+ CD34dim_5",
                     "HSC_CD133+ CD34dim_7",
                     "HSC_CD133+ CD34dim_8",
                     "HSC_CD38- CD34+",
                     "Myeloid Dendritic Cell",
                     "CD4+ T cells",
                     "CD8+ T cells",
                     "B cells",
                     "Erythroid_CD34- CD71lo GlyA+",
                     "NA")

# convert cell types to factors
uhg_base@meta.data$named_subclusters <- factor(uhg_base@meta.data$named_subclusters, levels = level.conserved)



# define cell types in order
color.conserved <- c("#e31a1c",#"Monocyte_1",
                    "#bd0026", #"Monocyte_2",
                     "#762a83",#"Common myeloid progenitor_1",
                     "#6a51a3",#"Common myeloid progenitor_2",
                     "#807dba",#"Common myeloid progenitor_3",
                     "#08519c",#"HSC_CD133+ CD34dim_1",
                     "#2171b5",#"HSC_CD133+ CD34dim_2",
                     "#4292c6",#"HSC_CD133+ CD34dim_3",
                     "#6baed6",#"HSC_CD133+ CD34dim_4",
                     "#9ecae1",#"HSC_CD133+ CD34dim_5",
                     "#c6dbef",#"HSC_CD133+ CD34dim_7",
                     "#deebf7",#"HSC_CD133+ CD34dim_8",
                    "mediumblue",#"HSC_CD38- CD34+",
                    "#ff7f00", #"Myeloid Dendritic Cell",
                    "#33a02c", # "CD4+ T cells",
                    "#b2df8a", #"CD8+ T cells",
                    "#20b2aa", #"B cells",
                    "#c51b7d", #"Erythroid_CD34- CD71lo GlyA+"
                    "gray95") #"NA"

show_col(color.conserved)

p <- DimPlot(uhg_base, group.by = "named_subclusters", cols = color.conserved, pt.size = 0.1) + xlab("UMAP_1") + ylab("UMAP_2")

print(p)

pdf(paste(DIR.407,"output/","uhg_base_all_clustifyr_conserved.pdf",sep = ""), width = 10, height = 6)
print(p)
dev.off()
## quartz_off_screen 
##                 2
# DimPlot(uhg_base, group.by = "named_subclusters", cols = color.conserved, pt.size = 0.1, split.by = "response") + xlab("UMAP_1") + ylab("UMAP_2")