Using mascarade package

This is a vignette describing usage of mascarade to generate masks for clusters on 2D dimensional reduction plots like UMAP or t-SNE.

Package installation

The most recent version of the package can be installed from GitHub:

remotes::install_github("alserglab/mascarade")

Loading necessary libraries

library(mascarade)
library(data.table)
library(ggplot2)
library(ggforce)

Example run

Loading example data from PBMC 3K processed with Seurat (see below for more details).

data("exampleMascarade")

UMAP coordinates:

head(exampleMascarade$dims)
##                   UMAP_1    UMAP_2
## AAACATACAACCAC -4.232792 -4.152139
## AAACATTGAGCTAC -4.892886 10.985685
## AAACATTGATCAGC -5.508639 -7.211088
## AAACCGTGCTTCCG 11.332233  3.161727
## AAACCGTGTATGCG -7.450703  1.092022
## AAACGCACTGGTAC -3.509504 -6.087042

Cluster annotations:

head(exampleMascarade$clusters)
## AAACATACAACCAC AAACATTGAGCTAC AAACATTGATCAGC AAACCGTGCTTCCG AAACCGTGTATGCG 
##   Memory CD4 T              B   Memory CD4 T     CD14+ Mono             NK 
## AAACGCACTGGTAC 
##   Memory CD4 T 
## 9 Levels: Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T FCGR3A+ Mono NK ... Platelet

Expression table for several genes:

head(exampleMascarade$features)
##                     MS4A1       GNLY       CD3E      CD14     FCER1A     FCGR3A
## AAACATACAACCAC -0.4110536 -0.4081782  1.0157094 -0.393789 -0.1373491 -0.4507969
## AAACATTGAGCTAC  2.5965712 -0.4081782 -0.9189074 -0.393789 -0.1373491 -0.4507969
## AAACATTGATCAGC -0.4110536  0.7526607  0.8148764 -0.393789 -0.1373491 -0.4507969
## AAACCGTGCTTCCG -0.4110536 -0.4081782 -0.9189074 -0.393789 -0.1373491  1.1300704
## AAACCGTGTATGCG -0.4110536  2.3958265 -0.9189074 -0.393789 -0.1373491 -0.4507969
## AAACGCACTGGTAC -0.4110536 -0.4081782  1.1029222 -0.393789 -0.1373491 -0.4507969
##                        LYZ       PPBP       CD8A
## AAACATACAACCAC -0.11104505 -0.1416271  2.1039769
## AAACATTGAGCTAC  0.06112027 -0.1416271 -0.3537211
## AAACATTGATCAGC  0.07833934 -0.1416271 -0.3537211
## AAACCGTGCTTCCG  1.40875149  2.9255239 -0.3537211
## AAACCGTGTATGCG -0.97272094 -0.1416271 -0.3537211
## AAACGCACTGGTAC -0.06309661 -0.1416271 -0.3537211

Let’s plot this data:

data <- data.table(exampleMascarade$dims, 
                   cluster=exampleMascarade$clusters,
                   exampleMascarade$features)
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(aes(color=cluster)) + 
    coord_fixed() + 
    theme_classic()

Now let’s generate cluster masks:

maskTable <- generateMask(dims=exampleMascarade$dims, 
                          clusters=exampleMascarade$clusters)

The maskTable is actually a table of cluster borders. A single cluster can have multiple connected parts, and one a single part can contain multiple border lines (groups).

head(maskTable)
##       UMAP_1    UMAP_2           part            group      cluster
##        <num>     <num>         <char>           <char>       <fctr>
## 1: -3.865697 -8.906611 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 2: -3.831308 -8.895148 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 3: -3.819845 -8.883685 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 4: -3.808383 -8.872222 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 5: -3.796920 -8.860760 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 6: -3.785457 -8.849297 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T

Now we can use this table to draw the borders with geom_path (group column should be used as the group aesthetics):

ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(aes(color=cluster)) + 
    geom_path(data=maskTable, aes(group=group)) +
    coord_fixed() + 
    theme_classic()

Or we can color the borders instead of points:

ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(color="grey") + 
    geom_path(data=maskTable, aes(group=group, color=cluster), linewidth=1) +
    coord_fixed() + 
    theme_classic()

We can use ggforce package to make the borders touch instead of overlap:

ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(color="grey") + 
    ggforce::geom_shape(data=maskTable, aes(group=group, color=cluster),
                        linewidth=1, fill=NA, expand=unit(-1, "pt")) +
    coord_fixed() + 
    theme_classic()

In the presence of small clusters it can help to expand the borders a bit further away from the points.

maskTable <- generateMask(dims=exampleMascarade$dims, 
                          clusters=exampleMascarade$clusters,
                          expand=0.02)
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(color="grey") + 
    ggforce::geom_shape(data=maskTable, aes(group=group, color=cluster),
                        linewidth=1, fill=NA, expand=unit(-1, "pt")) +
    coord_fixed() + 
    theme_classic()

With the help of ggforce-based function geom_mark_shape we can also put the labels within the plot itself.

fancyMask <- list(
    geom_mark_shape(data=maskTable, aes(group=cluster, color=cluster, label = cluster),
                    fill = NA,
                   linewidth=1, expand=unit(-1, "pt"),
                   con.cap=0, con.type = "straight",
                   label.fontsize = 10, label.buffer = unit(0, "cm"),
                   label.fontface = "plain",
                   label.minwidth = 0,
                   label.margin = margin(2, 2, 2, 2, "pt"),
                   label.lineheight = 0,
                   con.colour = "inherit",
                   show.legend = FALSE),
    # expanding to give a bit more space for labels
    scale_x_continuous(expand = expansion(mult = 0.1)),
    scale_y_continuous(expand = expansion(mult = 0.1))
)

ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
    geom_point(color="grey") +
    fancyMask +
    coord_fixed() +
    theme_classic()

Now we can easily show association between cell types and expression of particular genes, such as GNLY being a good marker for NK cells in this dataset.

library(ggnewscale) # for having two color scales simultaneously
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(aes(color=GNLY), size=0.5) +
    scale_color_gradient2(low = "#404040", high="red") + 
    new_scale_color() + 
    fancyMask +
    coord_fixed() + 
    theme_classic()

We can focus on a single cluster too:

ggplot(data, aes(x=UMAP_1, y=UMAP_2)) + 
    geom_point(aes(color=GNLY), size=0.5) + 
    scale_color_gradient2(low = "#404040", high="red") + 
    geom_path(data=maskTable[cluster=="NK"], aes(group=group)) +
    coord_fixed() + 
    theme_classic()

Working with Seurat

For this part of the vignette you need Seuratpackage.

library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t

Let’s get the example PBMC3K dataset:

pbmc3k <- readRDS(url("https://alserglab.wustl.edu/files/mascarade/examples/pbmc3k_seurat5.rds"))
pbmc3k
## An object of class Seurat 
## 13714 features across 2638 samples within 1 assay 
## Active assay: RNA (13714 features, 2000 variable features)
##  2 layers present: counts, data
##  2 dimensional reductions calculated: pca, umap

The same object can be obtained using SeuratData package (can be installed with remotes::install_github('satijalab/seurat-data')):

if (requireNamespace("SeuratData")) {
    if (!AvailableData()["pbmc3k", "Installed"]) {
        InstallData("pbmc3k")    
    }
    LoadData("pbmc3k")
    
    pbmc3k <- UpdateSeuratObject(pbmc3k.final)    
    pbmc3k
}

Let’s plot some features:

featureList <- c("MS4A1", "GNLY", "CD3E", "CD14")
FeaturePlot(pbmc3k, features=featureList)
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Generate masks from UMAP data:

maskTable <- generateMask(
    dims=Embeddings(pbmc3k, "umap"),
    clusters=pbmc3k$seurat_annotations)

Now we can plot the same features with borders (there will be some warnings due to the scale change):

plots <- FeaturePlot(pbmc3k, features=featureList, combine = FALSE)

plots <- lapply(plots, `+`,
                list(
                    geom_path(data=maskTable, aes(x=UMAP_1, y=UMAP_2, group=group)),
                    # so that borders aren't cropped:
                    scale_x_continuous(expand = expansion(mult = 0.05)),
                    scale_y_continuous(expand = expansion(mult = 0.05))) 
                )

patchwork::wrap_plots(plots)

Works with t-SNE too:

pbmc3k <- RunTSNE(pbmc3k)

maskTable <- generateMask(
    dims=Embeddings(pbmc3k, "tsne"),
    clusters=pbmc3k$seurat_annotations)

plots <- FeaturePlot(pbmc3k, features=featureList, combine = FALSE, reduction = "tsne")

plots <- lapply(plots, `+`,
                list(
                    geom_path(data=maskTable, aes(x=tSNE_1, y=tSNE_2, group=group)),
                    # so that borders aren't cropped:
                    scale_x_continuous(expand = expansion(mult = 0.05)),
                    scale_y_continuous(expand = expansion(mult = 0.05))) 
                )

patchwork::wrap_plots(plots)

Session info

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 13 (trixie)
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=C.utf8       LC_NUMERIC=C          LC_TIME=C.utf8       
##  [4] LC_COLLATE=C.utf8     LC_MONETARY=C.utf8    LC_MESSAGES=C.utf8   
##  [7] LC_PAPER=C.utf8       LC_NAME=C             LC_ADDRESS=C         
## [10] LC_TELEPHONE=C        LC_MEASUREMENT=C.utf8 LC_IDENTIFICATION=C  
## 
## time zone: US/Central
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] Seurat_5.3.0       SeuratObject_5.1.0 sp_2.2-0           ggnewscale_0.5.2  
## [5] ggforce_0.5.0      ggplot2_4.0.1      data.table_1.17.8  mascarade_0.2.999 
## 
## loaded via a namespace (and not attached):
##   [1] deldir_2.0-4           pbapply_1.7-2          gridExtra_2.3         
##   [4] rlang_1.1.6            magrittr_2.0.4         RcppAnnoy_0.0.22      
##   [7] matrixStats_1.5.0      ggridges_0.5.6         compiler_4.5.1        
##  [10] spatstat.geom_3.6-0    reshape2_1.4.4         png_0.1-8             
##  [13] systemfonts_1.3.1      vctrs_0.6.5            stringr_1.5.2         
##  [16] crayon_1.5.3           pkgconfig_2.0.3        fastmap_1.2.0         
##  [19] labeling_0.4.3         promises_1.3.3         rmarkdown_2.30        
##  [22] purrr_1.1.0            xfun_0.53              cachem_1.1.0          
##  [25] jsonlite_2.0.0         goftest_1.2-3          later_1.4.4           
##  [28] spatstat.utils_3.2-0   tweenr_2.0.3           irlba_2.3.5.1         
##  [31] parallel_4.5.1         cluster_2.1.4          R6_2.6.1              
##  [34] ica_1.0-3              stringi_1.8.7          bslib_0.9.0           
##  [37] RColorBrewer_1.1-3     spatstat.data_3.1-9    reticulate_1.42.0     
##  [40] parallelly_1.45.0      spatstat.univar_3.1-4  scattermore_1.2       
##  [43] lmtest_0.9-40          jquerylib_0.1.4        Rcpp_1.1.0            
##  [46] knitr_1.50             tensor_1.5.1           future.apply_1.20.0   
##  [49] zoo_1.8-14             sctransform_0.4.2      httpuv_1.6.16         
##  [52] Matrix_1.7-3           splines_4.5.1          igraph_2.2.1          
##  [55] tidyselect_1.2.1       rstudioapi_0.17.1      abind_1.4-8           
##  [58] yaml_2.3.10            codetools_0.2-19       spatstat.random_3.4-2 
##  [61] miniUI_0.1.2           spatstat.explore_3.5-3 listenv_0.9.1         
##  [64] plyr_1.8.9             lattice_0.22-7         tibble_3.3.0          
##  [67] shiny_1.10.0           withr_3.0.2            S7_0.2.1              
##  [70] ROCR_1.0-11            evaluate_1.0.5         Rtsne_0.17            
##  [73] future_1.58.0          fastDummies_1.7.5      survival_3.8-3        
##  [76] polyclip_1.10-7        fitdistrplus_1.2-2     pillar_1.11.1         
##  [79] KernSmooth_2.23-26     plotly_4.11.0          generics_0.1.4        
##  [82] RcppHNSW_0.6.0         scales_1.4.0           globals_0.18.0        
##  [85] xtable_1.8-4           glue_1.8.0             lazyeval_0.2.2        
##  [88] tools_4.5.1            RSpectra_0.16-2        RANN_2.6.2            
##  [91] dotCall64_1.2          cowplot_1.2.0          grid_4.5.1            
##  [94] tidyr_1.3.1            nlme_3.1-168           patchwork_1.3.1       
##  [97] cli_3.6.5              spatstat.sparse_3.1-0  spam_2.11-1           
## [100] viridisLite_0.4.2      dplyr_1.1.4            uwot_0.2.3            
## [103] gtable_0.3.6           sass_0.4.10            digest_0.6.37         
## [106] progressr_0.15.1       ggrepel_0.9.6          htmlwidgets_1.6.4     
## [109] farver_2.1.2           htmltools_0.5.8.1      lifecycle_1.0.4       
## [112] httr_1.4.7             mime_0.13              MASS_7.3-65