# Load softwares
library(biomaRt)
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks biomaRt::select()
library(dplyr)
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
## Attaching SeuratObject
library(clustifyr)
library(RColorBrewer)
library(gprofiler2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(ggpubr)
library(jpeg)
library(leidenbase)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
##
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
# define color palette
palette.lscx <- c( "#b30024", #"Monocyte",
"#fa1e20",#"Colony Forming Unit-Monocyte ",
"#ff7f00", #"Myeloid Dendritic Cell",
"#0b47d4",#"Hematopoietic stem cell_CD133+ CD34dim",
"#5a95d1", #"Erythroid_CD34+ CD71+ GlyA-"
"#f28ac6", #"Erythroid_CD34- CD71lo GlyA+ "
"#20b2aa", #"Naïve B-cells",
"#1cad15", # "Naive CD4+ T-cell",
"#b2df8a", #"CD8+ Effector Memory",
"#d8db02") #"Mature NK cell_CD56- CD16+ CD3-"
# define myeloid subpopulations:
myeloid <- c("Mono",
"CFU-Mono",
"Prim (CD34-dim)",
"Prim (CD34-high)"
)
palette.myeloid <- c("#b30024", #"Monocyte",
"#fa1e20",#"Colony Forming Unit-Monocyte ",
"#0b47d4",#"Hematopoietic stem cell_CD133+ CD34dim",
"#5a95d1") #"Erythroid_CD34+ CD71+ GlyA-"
hema <- clustifyrdatahub::ref_hema_microarray()
## snapshotDate(): 2020-10-27
## snapshotDate(): 2020-10-27
## see ?clustifyrdatahub and browseVignettes('clustifyrdatahub') for documentation
## loading from cache
new.cluster.names <- c("Monocytic", #"Monocyte"
"CFU-Mono",#"Colony Forming Unit-Monocyte "
"Prim (CD34-dim)", #"Hematopoietic stem cell_CD133+ CD34dim",
"Prim (CD34-high)") # "Erythroid_CD34+ CD71+ GlyA-",
# read in seurat object
res <- readRDS("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/SSP_LSCx_version1_downsampled_20210125_harmonized_clustered_res.Rds") %>%
SetIdent(., value = "Subpopulations") %>%
subset(., idents = myeloid)
# SetIdent(., value = "type.pheno") %>%
# subset(., idents = c("MMP", "Normal"))
res@meta.data$type.full <- paste(res@meta.data$type.lsc, res@meta.data$htb, res@meta.data$type.disease, sep = "_")
# strip off all reductions from upstream analysis
res@reductions$pca <- NULL
res@reductions$pca_umap <- NULL
res@reductions$harmony <- NULL
res@reductions$umap <- NULL
sample.n <- unique(dplyr::filter(res@meta.data, type.lsc == "Normal")$type.full)
sample.a <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-A")$type.full)
sample.b <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-B")$type.full)
sample.br <- unique(dplyr::filter(res@meta.data, type.lsc == "Type-B-R")$type.full)
for (i in sample.n){ # unique(res@meta.data$htb)
# Make monocle 3 object cds
cds <- res %>%
SetIdent(., value = "type.full") %>%
subset(., idents = i) %>% # subset individual specimens
as.cell_data_set() # Convert Seurat obejct into Monocle3 object
cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
# preprocess
cds <- preprocess_cds(cds,
method = "PCA", # PCA or LSI(latent semantic indexing)
num_dim = 50,
norm_method = "log")
# # batch correction using run info
# cds <- align_cds(cds,
# preprocess_method = "PCA",
# alignment_group = "htb") #???
# reduce dimensions
cds <- reduce_dimension(cds,
preprocess_method = "PCA")
# Run cluster and learn_graph
cds <- cluster_cells(cds = cds, reduction_method = "UMAP")
# # Run clustify
# cds <- clustify(cds,
# ref_mat = hema , #or "cbmc_ref"
# cluster_col = "Clusters",
# threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
# rename_prefix = "hema_microarray_pearson_0.1",
# compute_method = "pearson", #spearman, pearson, kendall, cosine
# verbose = T)
#
# # Rename clusters with shorter name
# cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
# from = unique(cds@colData$hema_microarray_pearson_0.1_type),
# to = new.cluster.names)# Check the assignments
#
# Learn graphs
cds <- learn_graph(cds, use_partition = T)
# Initial check before pick root nodes
pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
p <- plot_cells(cds,
reduction_method = "UMAP",
color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
#group_cells_by = "Subpopulations",
label_groups_by_cluster=F,
label_leaves=F,
label_branch_points=F) +
#scale_color_manual(values = palette.myeloid) +
ggtitle(i)
print(p)
dev.off()
print(p)
}
##
|
| | 0%
|
|======================================================================| 100%
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
##
|
| | 0%
|
|======================================================================| 100%
for (i in sample.a){ # unique(res@meta.data$htb)
# Make monocle 3 object cds
cds <- res %>%
SetIdent(., value = "type.full") %>%
subset(., idents = i) %>% # subset individual specimens
as.cell_data_set() # Convert Seurat obejct into Monocle3 object
cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
# preprocess
cds <- preprocess_cds(cds,
method = "PCA", # PCA or LSI(latent semantic indexing)
num_dim = 50,
norm_method = "log")
# # batch correction using run info
# cds <- align_cds(cds,
# preprocess_method = "PCA",
# alignment_group = "htb") #???
# reduce dimensions
cds <- reduce_dimension(cds,
preprocess_method = "PCA")
# Run cluster and learn_graph
cds <- cluster_cells(cds = cds, reduction_method = "UMAP")
# # Run clustify
# cds <- clustify(cds,
# ref_mat = hema , #or "cbmc_ref"
# cluster_col = "Clusters",
# threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
# rename_prefix = "hema_microarray_pearson_0.1",
# compute_method = "pearson", #spearman, pearson, kendall, cosine
# verbose = T)
#
# # Rename clusters with shorter name
# cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
# from = unique(cds@colData$hema_microarray_pearson_0.1_type),
# to = new.cluster.names)# Check the assignments
#
# Learn graphs
cds <- learn_graph(cds, use_partition = T)
# Initial check before pick root nodes
pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
p <- plot_cells(cds,
reduction_method = "UMAP",
color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
#group_cells_by = "Subpopulations",
label_groups_by_cluster=F,
label_leaves=F,
label_branch_points=F) +
#scale_color_manual(values = palette.myeloid) +
ggtitle(i)
print(p)
dev.off()
print(p)
}
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
for (i in sample.b){ # unique(res@meta.data$htb)
# Make monocle 3 object cds
cds <- res %>%
SetIdent(., value = "type.full") %>%
subset(., idents = i) %>% # subset individual specimens
as.cell_data_set() # Convert Seurat obejct into Monocle3 object
cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
# preprocess
cds <- preprocess_cds(cds,
method = "PCA", # PCA or LSI(latent semantic indexing)
num_dim = 50,
norm_method = "log")
# # batch correction using run info
# cds <- align_cds(cds,
# preprocess_method = "PCA",
# alignment_group = "htb") #???
# reduce dimensions
cds <- reduce_dimension(cds,
preprocess_method = "PCA")
# Run cluster and learn_graph
cds <- cluster_cells(cds = cds, reduction_method = "UMAP")
# Run clustify
# cds <- clustify(cds,
# ref_mat = hema , #or "cbmc_ref"
# cluster_col = "Clusters",
# threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
# rename_prefix = "hema_microarray_pearson_0.1",
# compute_method = "pearson", #spearman, pearson, kendall, cosine
# verbose = T)
#
# # Rename clusters with shorter name
# cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
# from = unique(cds@colData$hema_microarray_pearson_0.1_type),
# to = new.cluster.names)# Check the assignments
#
# Learn graphs
cds <- learn_graph(cds, use_partition = T)
# Initial check before pick root nodes
pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
p <- plot_cells(cds,
reduction_method = "UMAP",
color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
#group_cells_by = "Subpopulations",
label_groups_by_cluster=F,
label_leaves=F,
label_branch_points=F) +
#scale_color_manual(values = palette.myeloid) +
ggtitle(i)
print(p)
dev.off()
print(p)
}
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
for (i in sample.br){ # unique(res@meta.data$htb)
# Make monocle 3 object cds
cds <- res %>%
SetIdent(., value = "type.full") %>%
subset(., idents = i) %>% # subset individual specimens
as.cell_data_set() # Convert Seurat obejct into Monocle3 object
cds@colData$Subpopulations <- factor(cds@colData$Subpopulations, levels = myeloid)
# preprocess
cds <- preprocess_cds(cds,
method = "PCA", # PCA or LSI(latent semantic indexing)
num_dim = 50,
norm_method = "log")
# # batch correction using run info
# cds <- align_cds(cds,
# preprocess_method = "PCA",
# alignment_group = "htb") #???
# reduce dimensions
cds <- reduce_dimension(cds,
preprocess_method = "PCA")
# Run cluster and learn_graph
cds <- cluster_cells(cds = cds, reduction_method = "UMAP")
# # Run clustify
# cds <- clustify(cds,
# ref_mat = hema , #or "cbmc_ref"
# cluster_col = "Clusters",
# threshold = 0.1, # threshold settings 0.1-0.4 gave same best results
# rename_prefix = "hema_microarray_pearson_0.1",
# compute_method = "pearson", #spearman, pearson, kendall, cosine
# verbose = T)
#
# # Rename clusters with shorter name
# cds@colData$Subpopulations.new <- plyr::mapvalues(x=cds@colData$hema_microarray_pearson_0.1_type,
# from = unique(cds@colData$hema_microarray_pearson_0.1_type),
# to = new.cluster.names)# Check the assignments
#
# Learn graphs
cds <- learn_graph(cds, use_partition = T)
# Initial check before pick root nodes
pdf(paste("~/Documents/Lab_data/P_LSCx/LSCx-CITEseq/ssp_analysis/monocle3_output/",i,"_monocle3.pdf", sep = ""), width = 3, height = 3)
p <- plot_cells(cds,
reduction_method = "UMAP",
color_cells_by = "Subpopulations", # cluster or partition or Subpopulations
#group_cells_by = "Subpopulations",
label_groups_by_cluster=F,
label_leaves=F,
label_branch_points=F) +
#scale_color_manual(values = palette.myeloid) +
ggtitle(i)
print(p)
dev.off()
print(p)
}
##
|
| | 0%
|
|======================================================================| 100%