# Install packages
install.packages(c("Seurat","dplyr","tidyr","readr","pheatmap"))
# Install Bioconductor packages
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("clusterProfiler")
BiocManager::install("org.Mm.eg.db")
BiocManager::install("org.Hs.eg.db")
# Install CellDART
devtools::install_github("mexchy1000/CellDART", build_vignettes = T, force = T)
# Install STopover
devtools::install_github("bsungwoo/STopover", build_vignettes = T, force = T)library(STopover)
library(CellDART)
library(Seurat)
#> Attaching SeuratObject
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(pheatmap)# Make output folder
output_folder_name <- 'Results'
if (!file.exists(output_folder_name)){
dir.create(output_folder_name)
}
fig_dir <- file.path(output_folder_name, 'figures')
if (!file.exists(fig_dir)){
dir.create(fig_dir)
}# Patient name for single-cell data
patient_list_sc <- c('CID4290A','CID4535','CID4465','CID44971')
tissue_type <- c("ER_1","ER_2","TNBC_1","TNBC_2")
# Load single-cell count matrix and metadata
sc_metadata <- read.csv('./Whole_miniatlas_meta.csv')
sc_metadata <- sc_metadata[-c(1),]
rownames(sc_metadata) <- sc_metadata$NAME
sc_metadata <- sc_metadata[-c(1,3)]
sc_metadata$nCount_RNA <- as.numeric(sc_metadata$nCount_RNA)
sc_metadata$nFeature_RNA <- as.numeric(sc_metadata$nFeature_RNA)
# Reverse log transform count matrix to get total normalized count
sc_data <- CreateSeuratObject(counts = expm1(expression_matrix))
# Add metadata and log-normalize
sc_data <- AddMetaData(sc_data, sc_metadata)
sc_data <- NormalizeData(sc_data, normalization.method = "LogNormalize", scale.factor = 1e4)
# Save single-cell Seurat object
readr::write_rds(sc_data, 'sc_data.rds')# Patient name for spatial data
patient_list_sp <- c('CID4290','CID4535','CID4465','CID44971')
tissue_type <- c("ER_1","ER_2","TNBC_1","TNBC_2")
for (i in 1:length(patient_list_sp)){
# Read count matrix
breast_data <- Seurat::ReadMtx(paste0('./filtered_count_matrices/',patient_list_sp[i],
'_filtered_count_matrix/matrix.mtx.gz'),
paste0('./filtered_count_matrices/',patient_list_sp[i],
'_filtered_count_matrix/barcodes.tsv.gz'),
paste0('./filtered_count_matrices/',patient_list_sp[i],
'_filtered_count_matrix/features.tsv.gz'),
feature.column = 1)
breast_data <- Seurat::CreateSeuratObject(counts = breast_data, assay = "Spatial")
# Read Visium images and coordinates
breast_image <- Seurat::Read10X_Image(paste0('./spatial/',
patient_list_sp[i],'_spatial/'))
breast_image <- breast_image[Seurat::Cells(x = breast_data)]
DefaultAssay(object = breast_image) <- 'Spatial'
breast_data[[patient_list_sp[i]]] <- breast_image
breast_data$orig.ident <- patient_list_sp[i]
# Add metadata to the Seurat spatial data
metadata <- read.csv(paste0('./metadata/',patient_list_sp[i],
'_metadata.csv'), header = T)
rownames(metadata) <- metadata$X
metadata <- metadata[,-(1)]
breast_data <- AddMetaData(breast_data, metadata)
breast_data$orig.ident <- factor(breast_data$orig.ident, levels=patient_list_sp[i])
# Normalize the data and save
breast_data <- NormalizeData(breast_data,
normalization.method = "LogNormalize",
scale.factor = 1e4)
readr::write_rds(breast_data, file.path(output_folder_name, paste0('sp_data_',patient_list_sp[i],'.rds')))
}# Load single-cell data (if needed)
sc_data <- readr::read_rds('sc_data.rds')
for (i in 1:length(patient_list_sp)){
# Load spatial data (if needed)
sp_data <- readr::read_rds(paste0('sp_data_',patient_list_sp[i],'.rds'))
# Subset single-cell data to contain only the data obtained from the same patient
sc_data_sel <- sc_data
Idents(sc_data_sel) <- sc_data_sel$Patient
sc_data_sel <- subset(sc_data_sel, idents = patient_list_sc[i])
# Set the pseudospot number as the 5 times the number of spatial spots
npseudo <- 5*dim(sp_data)[2]
sp_data_cellf <- CellDART::pred_cellf_celldart(sp_data=sp_data,
sc_data=sc_data_sel,
outdir = output_folder_name,
env.select = "conda",
env.name = "CellDART",
gpu = TRUE,
metadata_celltype = "celltype_major",
num_markers = 20,
seed_num = 0,
nmix = 10,
npseudo = 20000,
alpha = 0.6,
alpha_lr = 5,
emb_dim = 64,
batch_size = 512,
n_iterations = 3000,
init_train_epoch = 10)
## Save Seurat object with cell fraction
readr::write_rds(sp_data_cellf, file.path(output_folder_name,
paste0('sp_data_cellf_',patient_list_sp[i],'.rds')))
}# Load the spatial data with predicted cell fraction and save as a list
sp_data_list <- lapply(patient_list_sp, function(x) {
readr::read_rds(file.path(output_folder_name,
paste0('sp_data_cellf_',x,'.rds')))})
# Include the spots that are located inside of the tissue
for (idx in 1:length(sp_data_list)){
in_tissue_spots <- intersect(rownames(sp_data_list[[idx]]@images[[1]]@coordinates),
colnames(sp_data_list[[idx]]))
sp_data_sub <- subset(sp_data_list[[idx]], cell=in_tissue_spots)
sp_data_list[[idx]] <- sp_data_sub
}
# Merge the spatial Seurat objects
sp_data_merge <- merge(sp_data_list[[1]], sp_data_list[2:4])
sp_data_merge_mod <- sp_data_merge
# Remove the '_cellf' from the metadata columns (represents predicted cell fraction)
colnames(sp_data_merge_mod@meta.data) <- sapply(colnames(sp_data_merge_mod@meta.data),
function(x) strsplit(x, split='_cellf')[[1]])
# Fill in NA values with 0
sp_data_merge_mod@meta.data[is.na(sp_data_merge_mod@meta.data)] <- 0
# save the Seurat object
readr::write_rds(sp_data_merge_mod, file.path(output_folder_name, 'sp_data_merge_mod.rds'))# Load cell type names saved in the medatata of single-cell data
celltype_names <- c('Normal.Epithelial','T.cells','Cancer.Epithelial','PVL',
'Endothelial','CAFs','B.cells','Myeloid','Plasmablasts')
# Define cell type pairs as calculated between single-cell and spatial data
feat_pairs <- data.frame(A="Cancer.Epithelial", B=celltype_names)
# Fun Stopover analysis between cancer epithelial cells and other cell types
sp_data_merge_celltype <- STopover::topological_similarity(sp_data_merge_mod,
feat_pairs,
use_lr_db = F,
lr_db_species = "human",
min_size = 20,
fwhm = 2.5,
thres_per = 30,
assay = "Spatial",
slot = "data",
lognorm = F)
readr::write_rds(sp_data_merge_celltype, 'sp_data_merge_celltype.rds')tissue_type_ <- c("ER","ER","TNBC","TNBC")
# Load global overlap score calculated between cancer epithelial and other cell types
df <- sp_data_merge_celltype@misc %>%
dplyr::select(batch, Feat_2, J_comp) %>% dplyr::filter(Feat_2!="Cancer.Epithelial") %>%
tidyr::pivot_wider(names_from = batch, values_from = J_comp) %>%
tibble::column_to_rownames(var = "Feat_2")
# Change column names (add tissue subtype information)
colnames(df) <- paste0(colnames(df), ": ", tissue_type_)
# Fill NA values with 0
df[is.na(df)] = 0
# Color choices
grDevices::hcl.pals()
#> [1] "Pastel 1" "Dark 2" "Dark 3" "Set 2"
#> [5] "Set 3" "Warm" "Cold" "Harmonic"
#> [9] "Dynamic" "Grays" "Light Grays" "Blues 2"
#> [13] "Blues 3" "Purples 2" "Purples 3" "Reds 2"
#> [17] "Reds 3" "Greens 2" "Greens 3" "Oslo"
#> [21] "Purple-Blue" "Red-Purple" "Red-Blue" "Purple-Orange"
#> [25] "Purple-Yellow" "Blue-Yellow" "Green-Yellow" "Red-Yellow"
#> [29] "Heat" "Heat 2" "Terrain" "Terrain 2"
#> [33] "Viridis" "Plasma" "Inferno" "Rocket"
#> [37] "Mako" "Dark Mint" "Mint" "BluGrn"
#> [41] "Teal" "TealGrn" "Emrld" "BluYl"
#> [45] "ag_GrnYl" "Peach" "PinkYl" "Burg"
#> [49] "BurgYl" "RedOr" "OrYel" "Purp"
#> [53] "PurpOr" "Sunset" "Magenta" "SunsetDark"
#> [57] "ag_Sunset" "BrwnYl" "YlOrRd" "YlOrBr"
#> [61] "OrRd" "Oranges" "YlGn" "YlGnBu"
#> [65] "Reds" "RdPu" "PuRd" "Purples"
#> [69] "PuBuGn" "PuBu" "Greens" "BuGn"
#> [73] "GnBu" "BuPu" "Blues" "Lajolla"
#> [77] "Turku" "Hawaii" "Batlow" "Blue-Red"
#> [81] "Blue-Red 2" "Blue-Red 3" "Red-Green" "Purple-Green"
#> [85] "Purple-Brown" "Green-Brown" "Blue-Yellow 2" "Blue-Yellow 3"
#> [89] "Green-Orange" "Cyan-Magenta" "Tropic" "Broc"
#> [93] "Cork" "Vik" "Berlin" "Lisbon"
#> [97] "Tofino" "ArmyRose" "Earth" "Fall"
#> [101] "Geyser" "TealRose" "Temps" "PuOr"
#> [105] "RdBu" "RdGy" "PiYG" "PRGn"
#> [109] "BrBG" "RdYlBu" "RdYlGn" "Spectral"
#> [113] "Zissou 1" "Cividis" "Roma"# Draw heatmap with pheatmap
out <- pheatmap::pheatmap(t(df), cluster_rows=T, show_rownames=T,
cluster_cols=T, angle_col=45, fontsize=15,
color = grDevices::hcl.colors(100, "Inferno"),
na_col = "grey70",
breaks = seq(0, 0.3, by = 0.01*0.3),
clustering_method = 'average',
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation")tissue_type <- c("ER","ER","TNBC","TNBC")
patient_list_sp <- c('CID4290','CID4535','CID4465','CID44971')
slide_name <- paste0(patient_list_sp, ": ", tissue_type)
STopover::vis_all_connected(sp_data_merge_celltype,
feat_name_x = "Cancer.Epithelial",
feat_name_y = "Myeloid",
slide_titles = slide_name,
title_fontsize = 10,
legend_fontsize = 11,
save = T, slide_ncol = 2, dot_size = 1.6,
save_path = fig_dir,
save_name_add = paste0("_celltype"),
fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a visium datasetSTopover::vis_jaccard_top_n_pair(sp_data_merge_celltype,
feat_name_x = "Cancer.Epithelial",
feat_name_y = "Myeloid",
top_n = 2,
slide_name = "CID4465",
title_fontsize = 10,
legend_fontsize = 12,
save = T, slide_ncol = 2, dot_size = 1.6,
save_path = fig_dir,
save_name_add = paste0("_celltype"),
fig_width = 9, fig_height = 9, dpi = 150)
#> The provided object is considered a visium dataset
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.sp_data_merge_lr <- STopover::topological_similarity(sp_data_merge_mod,
use_lr_db = T,
lr_db_species = "human",
min_size = 20,
fwhm = 2.5,
thres_per = 30,
assay = "Spatial",
slot = "data",
lognorm = F)
readr::write_rds(sp_data_merge_lr, 'sp_data_merge_lr.rds')tissue_type <- list("ER","ER","TNBC","TNBC")
patient_list_sp <- c('CID4290','CID4535','CID4465','CID44971')
result_list <- STopover::vis_diff_inc_lr_pairs(sp_data_merge_lr,
ref_group=c('CID4290','CID4535'),
comp_group=c('CID4465','CID44971'),
logFC_cutoff=1, J_comp_cutoff=0.2,
go_species="human",
ontology_cat="BP",
padjust_method = "BH", padjust_cutoff=0.05,
top_n = 10, heatmap_max=5,
title_fontsize=12,
xaxis_title_fontsize=10,
yaxis_title_fontsize=10,
angle_col=45,
colorbar_palette="RdPu",
legend_loc='right',legend_fontsize=10,
save_plot=F, save_path='.',
save_name='heatmap_GO_LR_int',
fig_width=21, fig_height=10, dpi=150,
return_results=T)
#> Loading required package: clusterProfiler
#>
#> Registered S3 method overwritten by 'ggtree':
#> method from
#> identify.gg ggfun
#> clusterProfiler v4.2.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
#>
#> If you use clusterProfiler in published research, please cite:
#> T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
#>
#> Attaching package: 'clusterProfiler'
#> The following object is masked from 'package:stats':
#>
#> filter
#> Loading required package: org.Hs.eg.db
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:clusterProfiler':
#>
#> rename
#> The following object is masked from 'package:tidyr':
#>
#> expand
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#>
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:clusterProfiler':
#>
#> slice
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#>
#> Attaching package: 'AnnotationDbi'
#> The following object is masked from 'package:clusterProfiler':
#>
#> select
#> The following object is masked from 'package:dplyr':
#>
#> select
#>
#> 'select()' returned 1:1 mapping between keys and columnsdf_diff_TNBC <- result_list[[1]]
plot_diff_TNBC <- result_list[[2]]# Extract the to upregulated LR pairs in TNBC compared to ER+ tissues
df_diff_TNBC
#> # A tibble: 196 × 6
#> # Groups: Group [1]
#> Group lr_pair Feat_1 Feat_2 Mean logFC
#> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 comp IL18_IL1RL2 IL18 IL1RL2 0.253 6.66
#> 2 comp INSL3_GPR84 INSL3 GPR84 0.228 4.75
#> 3 comp AGT_LRP2 AGT LRP2 0.225 4.17
#> 4 comp IL1RN_IL1RL2 IL1RN IL1RL2 0.291 3.95
#> 5 comp LAMA1_ITGA3 LAMA1 ITGA3 0.295 3.91
#> 6 comp RLN2_ADCYAP1R1 RLN2 ADCYAP1R1 0.217 3.84
#> 7 comp CDH1_EGFR CDH1 EGFR 0.262 3.82
#> 8 comp LAMA1_SDC2 LAMA1 SDC2 0.200 3.79
#> 9 comp LAMA1_ITGB4 LAMA1 ITGB4 0.227 3.79
#> 10 comp ARF1_CHRM3 ARF1 CHRM3 0.262 3.57
#> # … with 186 more rows# Plot heatmap
plot_diff_TNBCtissue_type <- c("ER","ER","TNBC","TNBC")
patient_list_sp <- c('CID4290','CID4535','CID4465','CID44971')
slide_name <- paste0(patient_list_sp, ": ", tissue_type)
STopover::vis_all_connected(sp_data_merge_lr,
feat_name_x = "IL18",
feat_name_y = "IL1RL2",
slide_titles = slide_name,
title_fontsize = 10,
legend_fontsize = 11,
save = T, slide_ncol = 2, dot_size = 1.6,
save_path = fig_dir,
save_name_add = paste0("_celltype"),
fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a visium datasetSTopover::vis_jaccard_top_n_pair(sp_data_merge_lr,
feat_name_x = "IL18",
feat_name_y = "IL1RL2",
top_n = 2,
slide_name = "CID4465",
title_fontsize = 10,
legend_fontsize = 12,
save = T, slide_ncol = 2, dot_size = 1.6,
save_path = fig_dir,
save_name_add = paste0("_celltype"),
fig_width = 9, fig_height = 9, dpi = 150)
#> The provided object is considered a visium dataset
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.