STopover: Analysis of CosMx SMI dataset

# Install packages
install.packages(c("Seurat","dplyr","tidyr","readr","pheatmap"))

# Install Bioconductor packages
if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("clusterProfiler")
BiocManager::install("org.Mm.eg.db")
BiocManager::install("org.Hs.eg.db")

# Install CellDART
devtools::install_github("mexchy1000/CellDART", build_vignettes = T, force = T)

# Install STopover
devtools::install_github("bsungwoo/STopover", build_vignettes = T, force = T)
library(STopover)
library(Seurat)
#> Attaching SeuratObject
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(pheatmap)
# Make output folder
output_folder_name <- 'Results'
if (!file.exists(output_folder_name)){
  dir.create(output_folder_name)
}

fig_dir <- file.path(output_folder_name, 'figures')
if (!file.exists(fig_dir)){
  dir.create(fig_dir)
}

1. Load and preprocess single-cell dataset

Read single-cell dataset (GSE131907) with tLung sample only
Cell type annotation in metadata as ‘Cell_subtype’
# Install and load environment
STopover::install_load_env('STopover')
# Import anndata
sc <- reticulate::import('scanpy', convert = FALSE)
pd <- reticulate::import('pandas', convert = FALSE)
# Import STopover
STopover <- reticulate::import('STopover', convert = FALSE)

# Load single-cell anndata that contains only samples from 'tLung'
sc_adata <- sc$read_h5ad('sc_lung_cancer.h5ad')
Modify the cell subtype names and save as .h5ad
# Extract metadata from anndata
metadata <- data.frame(reticulate::py_to_r(sc_adata$obs))

# Reassign Undetermined and NaN in 'Cell_subtype' into _ns subtype
metadata[['Cell_subtype']] <- as.character(metadata[['Cell_subtype']])
metadata[['Cell_type']] <- as.character(metadata[['Cell_type']])
metadata <- metadata %>%
  dplyr::mutate(Cell_subtype = ifelse((Cell_subtype=="Undetermined"|is.na(Cell_subtype)),
                                      paste0(Cell_type, "_ns"), Cell_subtype))

# Change the 'Cell_subtype' column to factor
metadata[['Cell_subtype']] <- factor(metadata[['Cell_subtype']])

# Update the metadata in original anndata
sc_adata$obs <- reticulate::r_to_py(metadata[reticulate::py_to_r(sc_adata$obs_names$tolist()), ])

# Save single-cell Seurat object
sc_adata$write('sc_lung_cancer_mod.h5ad')

2. Load and preprocess spatial dataset

Download CosMx SMI dataset (Lung 5-1: Data Files)
https://nanostring.com/resources/smi-ffpe-dataset-lung5-rep1-data/
https://nanostring-public-share.s3.us-west-2.amazonaws.com/SMI-Compressed/SMI-ReadMe.html
# Path for the data files
data_path = './Lung5_Rep1/Lung5_Rep1-Flat_files_and_images'

sp_data_list <- STopover::preprocess_cosmx(sp_load_path=data_path, 
                                           # sc_object can be Seurat object, anndata object, 
                                           # or path to the single-cell .h5ad file
                                           # below example is for path to '*.h5ad' file
                                           sc_object='sc_lung_cancer_mod.h5ad',
                                           conda.env.name='STopover',
                                           sc_celltype_colname = 'Cell_subtype', 
                                           sc_norm_total=1e3,
                                           tx_file_name = 'Lung5_Rep1_tx_file.csv',
                                           cell_exprmat_file_name='Lung5_Rep1_exprMat_file.csv',
                                           cell_metadata_file_name='Lung5_Rep1_metadata_file.csv',
                                           fov_colname = 'fov', cell_id_colname='cell_ID',
                                           tx_xcoord_colname='x_global_px', 
                                           tx_ycoord_colname='y_global_px',
                                           transcript_colname='target',
                                           meta_xcoord_colname='CenterX_global_px',
                                           meta_ycoord_colname='CenterY_global_px',
                                           x_bins=100, y_bins=100)
# Save preprocessed spatial data
readr::write_rds(sp_data_list, 'sp_lung_cancer_list.rds')
sp_data_grid <- sp_data_list[[1]]
sp_data_cell <- sp_data_list[[2]]

3. Visualize spatial cell annotation

# Remove cell types that end with '_ns'
cell_types_wo_ns <- grep(levels(sp_data_cell@meta.data[['celltype']]), pattern='_ns', value=T, invert=T)
Seurat::Idents(sp_data_cell) <- "celltype"
sp_data_cell_sub = subset(sp_data_cell, idents = cell_types_wo_ns)
# Visualize annotation of cells
Seurat::SpatialDimPlot(sp_data_cell_sub, pt.size.factor = 1)

4. STopover analysis: extract cell-cell colocalization patterns

# Load cell type names saved in the medatata of single-cell data
celltype_names <- colnames(sp_data_grid@meta.data)[5:51]

# Define cell type pairs as calculated between single-cell and spatial data
feat_pairs <- data.frame(A=c(), B=c())
for (celltype_a in celltype_names){
  for (celltype_b in celltype_names){
    feat_pairs <- rbind(feat_pairs, data.frame(A=celltype_a,B=celltype_b))
  }
}

# Fun Stopover analysis between cancer epithelial cells and other cell types
sp_data_grid_celltype <- STopover::topological_similarity(sp_data_grid, 
                                                          feat_pairs,
                                                          use_lr_db = F,
                                                          lr_db_species = "human",
                                                          min_size = 20,
                                                          fwhm = 2.5,
                                                          thres_per = 30,
                                                          assay = "Spatial",
                                                          slot = "data",
                                                          lognorm = F)
readr::write_rds(sp_data_grid_celltype, 'sp_data_merge_celltype.rds')

5. Draw cell-cell colocalization heatmap

# Load global overlap score calculated between cancer epithelial and other cell types
df <- sp_data_grid_celltype@misc %>%
  dplyr::select(Feat_1, Feat_2, J_comp) %>%
  tidyr::pivot_wider(names_from = Feat_1, values_from = J_comp) %>%
  tibble::column_to_rownames(var = "Feat_2")
# Fill NA values with 0
df[is.na(df)] = 0

# Color choices
grDevices::hcl.pals()
#>   [1] "Pastel 1"      "Dark 2"        "Dark 3"        "Set 2"        
#>   [5] "Set 3"         "Warm"          "Cold"          "Harmonic"     
#>   [9] "Dynamic"       "Grays"         "Light Grays"   "Blues 2"      
#>  [13] "Blues 3"       "Purples 2"     "Purples 3"     "Reds 2"       
#>  [17] "Reds 3"        "Greens 2"      "Greens 3"      "Oslo"         
#>  [21] "Purple-Blue"   "Red-Purple"    "Red-Blue"      "Purple-Orange"
#>  [25] "Purple-Yellow" "Blue-Yellow"   "Green-Yellow"  "Red-Yellow"   
#>  [29] "Heat"          "Heat 2"        "Terrain"       "Terrain 2"    
#>  [33] "Viridis"       "Plasma"        "Inferno"       "Rocket"       
#>  [37] "Mako"          "Dark Mint"     "Mint"          "BluGrn"       
#>  [41] "Teal"          "TealGrn"       "Emrld"         "BluYl"        
#>  [45] "ag_GrnYl"      "Peach"         "PinkYl"        "Burg"         
#>  [49] "BurgYl"        "RedOr"         "OrYel"         "Purp"         
#>  [53] "PurpOr"        "Sunset"        "Magenta"       "SunsetDark"   
#>  [57] "ag_Sunset"     "BrwnYl"        "YlOrRd"        "YlOrBr"       
#>  [61] "OrRd"          "Oranges"       "YlGn"          "YlGnBu"       
#>  [65] "Reds"          "RdPu"          "PuRd"          "Purples"      
#>  [69] "PuBuGn"        "PuBu"          "Greens"        "BuGn"         
#>  [73] "GnBu"          "BuPu"          "Blues"         "Lajolla"      
#>  [77] "Turku"         "Hawaii"        "Batlow"        "Blue-Red"     
#>  [81] "Blue-Red 2"    "Blue-Red 3"    "Red-Green"     "Purple-Green" 
#>  [85] "Purple-Brown"  "Green-Brown"   "Blue-Yellow 2" "Blue-Yellow 3"
#>  [89] "Green-Orange"  "Cyan-Magenta"  "Tropic"        "Broc"         
#>  [93] "Cork"          "Vik"           "Berlin"        "Lisbon"       
#>  [97] "Tofino"        "ArmyRose"      "Earth"         "Fall"         
#> [101] "Geyser"        "TealRose"      "Temps"         "PuOr"         
#> [105] "RdBu"          "RdGy"          "PiYG"          "PRGn"         
#> [109] "BrBG"          "RdYlBu"        "RdYlGn"        "Spectral"     
#> [113] "Zissou 1"      "Cividis"       "Roma"
# Draw heatmap with pheatmap
out <- pheatmap::pheatmap(t(df), cluster_rows=T, show_rownames=T,
                          cluster_cols=T, angle_col=45, fontsize=5,
                          color = grDevices::hcl.colors(100, "Inferno"), 
                          na_col = "grey70",
                          breaks = seq(0, 0.3, by = 0.01*0.3),
                          clustering_method = 'average',
                          clustering_distance_rows = "correlation",
                          clustering_distance_cols = "correlation")

6. Visualize colocalized domain bewteen the cell type pairs

Visualize spatial distribution of cell types

p1 <- STopover::vis_spatial_cosmx(sp_data_grid_celltype, "tS2", color_cont = "RdPu")
p2 <- STopover::vis_spatial_cosmx(sp_data_grid_celltype, "MAST", color_cont = "RdPu")
p1 + p2

Visualize all interacting locations for the cell type pair

STopover::vis_all_connected(sp_data_grid_celltype, 
                            feat_name_x = "tS2", 
                            feat_name_y = "MAST",
                            slide_titles = "Lung CosMx SMI",
                            title_fontsize = 12,
                            legend_fontsize = 10,
                            save = F,
                            save_path = fig_dir,
                            save_name_add = paste0("_celltype"), return_plot = T,
                            fig_width = 5, fig_height = 5, dpi = 150)
#> The provided object is considered a cosmx dataset

Visualize the top 2 interaction locations for the cell type pair

STopover::vis_jaccard_top_n_pair(sp_data_grid_celltype, 
                                 feat_name_x = "tS2", 
                                 feat_name_y = "MAST",
                                 top_n = 2,
                                 slide_title = "Lung CosMx SMI",
                                 title_fontsize = 12,
                                 legend_fontsize = 10,
                                 save = T,
                                 save_path = fig_dir,
                                 save_name_add = paste0("_celltype"),
                                 fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a cosmx dataset
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.

8. STopover analysis: extract dominant spatial LR interaction

sp_data_merge_lr <- STopover::topological_similarity(sp_data_grid,
                                                     use_lr_db = T,
                                                     lr_db_species = "human",
                                                     min_size = 20,
                                                     fwhm = 2.5,
                                                     thres_per = 30,
                                                     assay = "Spatial",
                                                     slot = "data",
                                                     lognorm = F)
readr::write_rds(sp_data_merge_lr, '/home/nmadmin/DATA2/STopover_test/Results/data_analysis/sp_data_merge_lr.rds')

Visualize spatial gene expression pattern

p1 <- STopover::vis_spatial_cosmx(sp_data_merge_lr, "CD274", color_cont = "RdPu")
p2 <- STopover::vis_spatial_cosmx(sp_data_merge_lr, "PDCD1", color_cont = "RdPu")
p1 + p2

Visualize all interacting locations for the top LR pair

STopover::vis_all_connected(sp_data_merge_lr, 
                            feat_name_x = "CD274", 
                            feat_name_y = "PDCD1",
                            slide_titles = "Lung CosMx SMI",
                            title_fontsize = 12,
                            legend_fontsize = 10,
                            save = T, slide_ncol = 2,
                            save_path = fig_dir,
                            save_name_add = paste0("_celltype"),
                            fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a cosmx dataset

Visualize the top 2 interaction locations for the top LR pair

STopover::vis_jaccard_top_n_pair(sp_data_merge_lr, 
                                 feat_name_x = "CD274", 
                                 feat_name_y = "PDCD1",
                                 top_n = 2,
                                 slide_title = "Lung CosMx SMI",
                                 title_fontsize = 10,
                                 legend_fontsize = 12,
                                 save = T, slide_ncol = 2, dot_size = 1.6,
                                 save_path = fig_dir,
                                 save_name_add = paste0("_celltype"),
                                 fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a cosmx dataset
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.

9. Estimate cell-type specific L-R interaction

sp_celltype_spec_lr <- STopover::topological_similarity_celltype_pair(sp_data_grid, 
                                                                      celltype_x='tS2',
                                                                      celltype_y='Cytotoxic.CD8..T',
                                                                      use_lr_db=T,
                                                                      lr_db_species='human',
                                                                      min_size=20,
                                                                      fwhm=2.5,
                                                                      thres_per=30,
                                                                      celltype_colname='Cell_subtype',
                                                                      conda.env.name='STopover',
                                                                      assay='Spatial',
                                                                      slot='data',
                                                                      lognorm=F,
                                                                      return_result_df=T)
readr::write_rds(sp_celltype_spec_lr, 'sp_celltype_spec_lr.rds')

Visualize tS2-Cytotoxic CD8+ specific spatial gene expression pattern

p1 <- STopover::vis_spatial_cosmx(sp_celltype_spec_lr, "tS2-CD274", 
                                  plot_title = "tS2: CD274", color_cont = "RdPu")
p2 <- STopover::vis_spatial_cosmx(sp_celltype_spec_lr, "Cytotoxic.CD8..T-CD274", 
                                  plot_title = "Cytotoxic CD8+ T: CD274", color_cont = "RdPu")
p1 + p2

Visualize all interacting locations for the tS2-Cytotoxic CD8+ specific top LR pair

STopover::vis_all_connected(sp_celltype_spec_lr, 
                            feat_name_x = "CD274", 
                            feat_name_y = "PDCD1",
                            celltype_x='tS2',
                            celltype_y='Cytotoxic.CD8..T',
                            slide_titles = "Lung CosMx SMI",
                            title_fontsize = 12,
                            legend_fontsize = 10,
                            save = T, slide_ncol = 2,
                            save_path = fig_dir,
                            save_name_add = paste0("_celltype"),
                            fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a cosmx dataset

Visualize the top 2 interaction locations for the tS2-Cytotoxic CD8+ specific top LR pair

STopover::vis_jaccard_top_n_pair(sp_celltype_spec_lr, 
                                 feat_name_x = "CD274", 
                                 feat_name_y = "PDCD1",
                                 celltype_x='tS2',
                                 celltype_y='Cytotoxic.CD8..T',
                                 top_n = 2,
                                 slide_title = "Lung CosMx SMI",
                                 title_fontsize = 8,
                                 legend_fontsize = 12,
                                 save = T, slide_ncol = 2, dot_size = 1.6,
                                 save_path = fig_dir,
                                 save_name_add = paste0("_celltype"),
                                 fig_width = 4, fig_height = 4, dpi = 150)
#> The provided object is considered a cosmx dataset
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.