1. Set up

# Load softwares
library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(dplyr)
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli
## Attaching SeuratObject
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggpubr)
library(jpeg)
library(leidenbase)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
## 
##     exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)


# define color palette
palette.lscx <- c( "#b30024", #"Monocyte",
                  "#fa1e20",#"Colony Forming Unit-Monocyte ",
                   "#ff7f00", #"Myeloid Dendritic Cell",
                   "#0b47d4",#"Hematopoietic stem cell_CD133+ CD34dim",
                   "#5a95d1", #"Erythroid_CD34+ CD71+ GlyA-"
                  "#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
                   "#20b2aa", #"Naïve B-cells",
                   "#1cad15", # "Naive CD4+ T-cell",
                   "#b2df8a", #"CD8+ Effector Memory",
                   "#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"

# define myeloid subpopulations:
myeloid <- c("Mono",
             "CFU-Mono",
             "Prim (CD34-dim)",
             "Prim (CD34-high)"
             )

palette.myeloid <- c("#b30024", #"Monocyte",
                  "#fa1e20",#"Colony Forming Unit-Monocyte ",
                   "#0b47d4",#"Hematopoietic stem cell_CD133+ CD34dim",
                   "#5a95d1") #"Erythroid_CD34+ CD71+ GlyA-"

hema <- clustifyrdatahub::ref_hema_microarray()
## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27
## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation
## loading from cache
new.cluster.names <- c("Monocytic", #"Monocyte"
                       "CFU-Mono",#"Colony Forming Unit-Monocyte "
                       "Prim (CD34-dim)", #"Hematopoietic stem cell_CD133+ CD34dim",
                       "Prim (CD34-high)") # "Erythroid_CD34+ CD71+ GlyA-",

Import Seurat object (Already filtered, Normalized, Scaled, Harmonized, Clustified)

# read in seurat object
res <- readRDS("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/SSP_LSCx_version1_downsampled_20210125_harmonized_clustered_res.Rds") %>%
  SetIdent(., value = "Subpopulations") %>%
  subset(., idents = myeloid)
  # SetIdent(., value = "type.pheno") %>%
  # subset(., idents = c("MMP", "Normal"))

res@meta.data$type.full <- paste(res@meta.data$type.lsc, res@meta.data$htb, res@meta.data$type.disease, sep = "_")

# strip off all reductions from upstream analysis
res@reductions$pca <- NULL
res@reductions$pca_umap <- NULL
res@reductions$harmony <- NULL
res@reductions$umap <- NULL

Monocle 3 anaysis

Method A (Split into subgroups -> Cluster and Calculate Pseudotime)

Type-A

Type-B

Type-B-R

Method B (Cluster -> Split)

Method C (Split on individual basis)

sample.n <- unique(dplyr::filter(res@meta.data, type.lsc == "Normal")$type.full)
sample.a <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-A")$type.full)
sample.b <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-B")$type.full)
sample.br <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-B-R")$type.full)

Normal BM

for (i in sample.n){ # unique(res@meta.data$htb)
  # Make monocle 3 object cds
  cds <- res %>%
    SetIdent(., value = "type.full") %>%
    subset(., idents = i) %>% # subset individual specimens
    as.cell_data_set() # Convert Seurat obejct into Monocle3 object
  
  cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
  
  # preprocess
  cds <- preprocess_cds(cds, 
                      method = "PCA", # PCA or LSI(latent semantic indexing)
                      num_dim = 50,
                      norm_method = "log")

  # # batch correction using run info
  # cds <- align_cds(cds, 
  #                preprocess_method = "PCA",
  #                alignment_group = "htb") #???

  # reduce dimensions
  cds <- reduce_dimension(cds,
                        preprocess_method = "PCA")


  # Run cluster and learn_graph
  cds <- cluster_cells(cds = cds, reduction_method = "UMAP") 
  
  # # Run clustify
  # cds <- clustify(cds, 
  #               ref_mat = hema , #or "cbmc_ref"
  #               cluster_col = "Clusters",
  #               threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
  #               rename_prefix = "hema_microarray_pearson_0.1",
  #               compute_method = "pearson", #spearman, pearson, kendall, cosine
  #               verbose = T)
  # 
  # # Rename clusters with shorter name
  # cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
  #                                                       from = unique(cds@colData$hema_microarray_pearson_0.1_type),
  #                                                       to = new.cluster.names)# Check the assignments
  # 
  # Learn graphs
  cds <- learn_graph(cds, use_partition = T)

  # Initial check before pick root nodes
  pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
  
  p <- plot_cells(cds, 
           reduction_method = "UMAP",
           color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
           #group_cells_by = "Subpopulations",
           label_groups_by_cluster=F,
           label_leaves=F,
           label_branch_points=F) +
    #scale_color_manual(values = palette.myeloid) +
    ggtitle(i)

  print(p)
  dev.off()
  
  print(p)
}
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

Type-A

for (i in sample.a){ # unique(res@meta.data$htb)
  # Make monocle 3 object cds
  cds <- res %>%
    SetIdent(., value = "type.full") %>%
    subset(., idents = i) %>% # subset individual specimens
    as.cell_data_set() # Convert Seurat obejct into Monocle3 object
  
  cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
  
  # preprocess
  cds <- preprocess_cds(cds, 
                      method = "PCA", # PCA or LSI(latent semantic indexing)
                      num_dim = 50,
                      norm_method = "log")

  # # batch correction using run info
  # cds <- align_cds(cds, 
  #                preprocess_method = "PCA",
  #                alignment_group = "htb") #???

  # reduce dimensions
  cds <- reduce_dimension(cds,
                        preprocess_method = "PCA")


  # Run cluster and learn_graph
  cds <- cluster_cells(cds = cds, reduction_method = "UMAP") 
  
  # # Run clustify
  # cds <- clustify(cds, 
  #               ref_mat = hema , #or "cbmc_ref"
  #               cluster_col = "Clusters",
  #               threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
  #               rename_prefix = "hema_microarray_pearson_0.1",
  #               compute_method = "pearson", #spearman, pearson, kendall, cosine
  #               verbose = T)
  # 
  # # Rename clusters with shorter name
  # cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
  #                                                       from = unique(cds@colData$hema_microarray_pearson_0.1_type),
  #                                                       to = new.cluster.names)# Check the assignments
  # 
  # Learn graphs
  cds <- learn_graph(cds, use_partition = T)

  # Initial check before pick root nodes
  pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
  
  p <- plot_cells(cds, 
           reduction_method = "UMAP",
           color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
           #group_cells_by = "Subpopulations",
           label_groups_by_cluster=F,
           label_leaves=F,
           label_branch_points=F) +
    #scale_color_manual(values = palette.myeloid) +
    ggtitle(i)

  print(p)
  dev.off()
  
  print(p)
}
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

Type-B

for (i in sample.b){ # unique(res@meta.data$htb)
  # Make monocle 3 object cds
  cds <- res %>%
    SetIdent(., value = "type.full") %>%
    subset(., idents = i) %>% # subset individual specimens
    as.cell_data_set() # Convert Seurat obejct into Monocle3 object
  
  cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
  
  # preprocess
  cds <- preprocess_cds(cds, 
                      method = "PCA", # PCA or LSI(latent semantic indexing)
                      num_dim = 50,
                      norm_method = "log")

  # # batch correction using run info
  # cds <- align_cds(cds, 
  #                preprocess_method = "PCA",
  #                alignment_group = "htb") #???

  # reduce dimensions
  cds <- reduce_dimension(cds,
                        preprocess_method = "PCA")


  # Run cluster and learn_graph
  cds <- cluster_cells(cds = cds, reduction_method = "UMAP") 
  
  # Run clustify
  # cds <- clustify(cds, 
  #               ref_mat = hema , #or "cbmc_ref"
  #               cluster_col = "Clusters",
  #               threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
  #               rename_prefix = "hema_microarray_pearson_0.1",
  #               compute_method = "pearson", #spearman, pearson, kendall, cosine
  #               verbose = T)
  # 
  # # Rename clusters with shorter name
  # cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
  #                                                       from = unique(cds@colData$hema_microarray_pearson_0.1_type),
  #                                                       to = new.cluster.names)# Check the assignments
  # 
  # Learn graphs
  cds <- learn_graph(cds, use_partition = T)

  # Initial check before pick root nodes
  pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
  
  p <- plot_cells(cds, 
           reduction_method = "UMAP",
           color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
           #group_cells_by = "Subpopulations",
           label_groups_by_cluster=F,
           label_leaves=F,
           label_branch_points=F) +
    #scale_color_manual(values = palette.myeloid) +
    ggtitle(i)

  print(p)
  dev.off()
  
  print(p)
}
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

Type-BR

for (i in sample.br){ # unique(res@meta.data$htb)
  # Make monocle 3 object cds
  cds <- res %>%
    SetIdent(., value = "type.full") %>%
    subset(., idents = i) %>% # subset individual specimens
    as.cell_data_set() # Convert Seurat obejct into Monocle3 object
  
  cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
  
  # preprocess
  cds <- preprocess_cds(cds, 
                      method = "PCA", # PCA or LSI(latent semantic indexing)
                      num_dim = 50,
                      norm_method = "log")

  # # batch correction using run info
  # cds <- align_cds(cds, 
  #                preprocess_method = "PCA",
  #                alignment_group = "htb") #???

  # reduce dimensions
  cds <- reduce_dimension(cds,
                        preprocess_method = "PCA")


  # Run cluster and learn_graph
  cds <- cluster_cells(cds = cds, reduction_method = "UMAP") 
  
  # # Run clustify
  # cds <- clustify(cds, 
  #               ref_mat = hema , #or "cbmc_ref"
  #               cluster_col = "Clusters",
  #               threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
  #               rename_prefix = "hema_microarray_pearson_0.1",
  #               compute_method = "pearson", #spearman, pearson, kendall, cosine
  #               verbose = T)
  # 
  # # Rename clusters with shorter name
  # cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
  #                                                       from = unique(cds@colData$hema_microarray_pearson_0.1_type),
  #                                                       to = new.cluster.names)# Check the assignments
  # 
  # Learn graphs
  cds <- learn_graph(cds, use_partition = T)

  # Initial check before pick root nodes
  pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
  
  p <- plot_cells(cds, 
           reduction_method = "UMAP",
           color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
           #group_cells_by = "Subpopulations",
           label_groups_by_cluster=F,
           label_leaves=F,
           label_branch_points=F) +
    #scale_color_manual(values = palette.myeloid) +
    ggtitle(i)

  print(p)
  dev.off()
  
  print(p)
}
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%