This is a vignette describing usage of mascarade
to
generate masks for clusters on 2D dimensional reduction plots like UMAP
or t-SNE.
The package can be installed from GitHub:
Loading example data from PBMC 3K processed with Seurat (see below for more details).
UMAP coordinates:
## UMAP_1 UMAP_2
## AAACATACAACCAC -4.232792 -4.152139
## AAACATTGAGCTAC -4.892886 10.985685
## AAACATTGATCAGC -5.508639 -7.211088
## AAACCGTGCTTCCG 11.332233 3.161727
## AAACCGTGTATGCG -7.450703 1.092022
## AAACGCACTGGTAC -3.509504 -6.087042
Cluster annotations:
## AAACATACAACCAC AAACATTGAGCTAC AAACATTGATCAGC AAACCGTGCTTCCG AAACCGTGTATGCG
## Memory CD4 T B Memory CD4 T CD14+ Mono NK
## AAACGCACTGGTAC
## Memory CD4 T
## 9 Levels: Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T FCGR3A+ Mono NK ... Platelet
Expression table for several genes:
## MS4A1 GNLY CD3E CD14 FCER1A FCGR3A
## AAACATACAACCAC -0.4110536 -0.4081782 1.0157094 -0.393789 -0.1373491 -0.4507969
## AAACATTGAGCTAC 2.5965712 -0.4081782 -0.9189074 -0.393789 -0.1373491 -0.4507969
## AAACATTGATCAGC -0.4110536 0.7526607 0.8148764 -0.393789 -0.1373491 -0.4507969
## AAACCGTGCTTCCG -0.4110536 -0.4081782 -0.9189074 -0.393789 -0.1373491 1.1300704
## AAACCGTGTATGCG -0.4110536 2.3958265 -0.9189074 -0.393789 -0.1373491 -0.4507969
## AAACGCACTGGTAC -0.4110536 -0.4081782 1.1029222 -0.393789 -0.1373491 -0.4507969
## LYZ PPBP CD8A
## AAACATACAACCAC -0.11104505 -0.1416271 2.1039769
## AAACATTGAGCTAC 0.06112027 -0.1416271 -0.3537211
## AAACATTGATCAGC 0.07833934 -0.1416271 -0.3537211
## AAACCGTGCTTCCG 1.40875149 2.9255239 -0.3537211
## AAACCGTGTATGCG -0.97272094 -0.1416271 -0.3537211
## AAACGCACTGGTAC -0.06309661 -0.1416271 -0.3537211
Let’s plot this data:
data <- data.table(exampleMascarade$dims,
cluster=exampleMascarade$clusters,
exampleMascarade$features)
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(aes(color=cluster)) +
coord_fixed() +
theme_classic()
Now let’s generate cluster masks:
The maskTable
is actually a table of cluster borders. A
single cluster can have multiple connected parts, and one a single part
can contain multiple border lines (groups).
## UMAP_1 UMAP_2 part group cluster
## <num> <num> <char> <char> <fctr>
## 1: -3.865697 -8.906611 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 2: -3.831308 -8.895148 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 3: -3.819845 -8.883685 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 4: -3.808383 -8.872222 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 5: -3.796920 -8.860760 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
## 6: -3.785457 -8.849297 Memory CD4 T#1 Memory CD4 T#1#1 Memory CD4 T
Now we can use this table to draw the borders with
geom_path
(group
column should be used as the
group aesthetics):
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(aes(color=cluster)) +
geom_path(data=maskTable, aes(group=group)) +
coord_fixed() +
theme_classic()
Or we can color the borders instead of points:
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(color="grey") +
geom_path(data=maskTable, aes(group=group, color=cluster), linewidth=1) +
coord_fixed() +
theme_classic()
We can use ggforce
package to make the borders touch
instead of overlap:
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(color="grey") +
ggforce::geom_shape(data=maskTable, aes(group=group, color=cluster),
linewidth=1, fill=NA, expand=unit(-1, "pt")) +
coord_fixed() +
theme_classic()
In the presence of small clusters it can help to expand the borders a bit further away from the points.
maskTable <- generateMask(dims=exampleMascarade$dims,
clusters=exampleMascarade$clusters,
expand=0.02)
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(color="grey") +
ggforce::geom_shape(data=maskTable, aes(group=group, color=cluster),
linewidth=1, fill=NA, expand=unit(-1, "pt")) +
coord_fixed() +
theme_classic()
With the help of ggforce
we can also put the labels
within the plot itself. For the best results use ggforce
version from https://github.com/assaron/ggforce/tree/remove-offset.
fancyMask <- list(
ggforce::geom_shape(data=maskTable, aes(group=group, color=cluster),
linewidth=1, fill=NA, expand=unit(-1, "pt"), show.legend = FALSE),
ggforce::geom_mark_hull(data=maskTable, fill = NA, aes(group=cluster, color=cluster, label = cluster),
linewidth=0,
radius=0, expand=0, con.cap=0, con.type = "straight",
label.fontsize = 10, label.buffer = unit(0, "cm"),
label.fontface = "plain",
label.minwidth = 0,
label.margin = margin(2, 2, 2, 2, "pt"),
label.lineheight = 0,
con.colour = "inherit",
show.legend = FALSE),
# expanding to give a bit more space for labels
scale_x_continuous(expand = expansion(mult = 0.1)),
scale_y_continuous(expand = expansion(mult = 0.1))
)
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(color="grey") +
fancyMask +
coord_fixed() +
theme_classic()
Now we can easily show association between cell types and expression of particular genes, such as GNLY being a good marker for NK cells in this dataset.
library(ggnewscale) # for having two color scales simultaneously
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(aes(color=GNLY), size=0.5) +
scale_color_gradient2(low = "#404040", high="red") +
new_scale_color() +
fancyMask +
coord_fixed() +
theme_classic()
We can focus on a single cluster too:
ggplot(data, aes(x=UMAP_1, y=UMAP_2)) +
geom_point(aes(color=GNLY), size=0.5) +
scale_color_gradient2(low = "#404040", high="red") +
geom_path(data=maskTable[cluster=="NK"], aes(group=group)) +
coord_fixed() +
theme_classic()
For this part of the vignette you need Seurat
and
SeuratData
packages (the latter can be installed with
remotes::install_github('satijalab/seurat-data')
).
Loading the example dataset:
## An object of class Seurat
## 13714 features across 2700 samples within 1 assay
## Active assay: RNA (13714 features, 0 variable features)
## 2 layers present: counts, data
Let’s plot some features:
Generate masks from UMAP data:
maskTable <- generateMask(
dims=Embeddings(pbmc3k.final, "umap"),
clusters=pbmc3k.final$seurat_annotations)
Now we can plot the same features with borders (there will be some warnings due to the scale change):
plots <- FeaturePlot(pbmc3k.final, features=featureList, combine = FALSE)
plots <- lapply(plots, `+`,
list(
geom_path(data=maskTable, aes(x=UMAP_1, y=UMAP_2, group=group)),
# so that borders aren't cropped:
scale_x_continuous(expand = expansion(mult = 0.05)),
scale_y_continuous(expand = expansion(mult = 0.05)))
)
patchwork::wrap_plots(plots)
Works with t-SNE too:
pbmc3k.final <- RunTSNE(pbmc3k.final)
maskTable <- generateMask(
dims=Embeddings(pbmc3k.final, "tsne"),
clusters=pbmc3k.final$seurat_annotations)
plots <- FeaturePlot(pbmc3k.final, features=featureList, combine = FALSE, reduction = "tsne")
plots <- lapply(plots, `+`,
list(
geom_path(data=maskTable, aes(x=tSNE_1, y=tSNE_2, group=group)),
# so that borders aren't cropped:
scale_x_continuous(expand = expansion(mult = 0.05)),
scale_y_continuous(expand = expansion(mult = 0.05)))
)
patchwork::wrap_plots(plots)
## R version 4.4.3 (2025-02-28)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=C.utf8 LC_NUMERIC=C LC_TIME=C.utf8
## [4] LC_COLLATE=C.utf8 LC_MONETARY=C.utf8 LC_MESSAGES=C.utf8
## [7] LC_PAPER=C.utf8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.utf8 LC_IDENTIFICATION=C
##
## time zone: US/Central
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stxBrain.SeuratData_0.1.2 pbmc3k.SeuratData_3.1.4
## [3] SeuratData_0.2.2.9001 Seurat_5.1.0
## [5] SeuratObject_5.0.2 sp_2.1-4
## [7] ggnewscale_0.5.0 ggforce_0.5.0
## [9] ggplot2_3.5.1 data.table_1.16.4
## [11] mascarade_0.2.0
##
## loaded via a namespace (and not attached):
## [1] pbapply_1.7-2 deldir_2.0-4 gridExtra_2.3
## [4] rlang_1.1.4 magrittr_2.0.3 RcppAnnoy_0.0.22
## [7] matrixStats_1.3.0 ggridges_0.5.6 compiler_4.4.3
## [10] spatstat.geom_3.3-4 reshape2_1.4.4 png_0.1-8
## [13] systemfonts_1.1.0 vctrs_0.6.5 stringr_1.5.1
## [16] crayon_1.5.3 pkgconfig_2.0.3 fastmap_1.2.0
## [19] labeling_0.4.3 promises_1.3.0 rmarkdown_2.27
## [22] purrr_1.0.2 xfun_0.45 cachem_1.1.0
## [25] jsonlite_1.8.8 goftest_1.2-3 highr_0.11
## [28] later_1.3.2 spatstat.utils_3.1-1 tweenr_2.0.3
## [31] irlba_2.3.5.1 parallel_4.4.3 cluster_2.1.4
## [34] R6_2.5.1 ica_1.0-3 stringi_1.8.4
## [37] bslib_0.7.0 RColorBrewer_1.1-3 spatstat.data_3.1-4
## [40] reticulate_1.38.0 parallelly_1.38.0 spatstat.univar_3.1-1
## [43] scattermore_1.2 lmtest_0.9-40 jquerylib_0.1.4
## [46] Rcpp_1.0.13-1 knitr_1.48 tensor_1.5
## [49] future.apply_1.11.2 zoo_1.8-12 sctransform_0.4.1
## [52] httpuv_1.6.15 Matrix_1.7-2 splines_4.4.3
## [55] igraph_2.0.3 tidyselect_1.2.1 rstudioapi_0.16.0
## [58] abind_1.4-5 yaml_2.3.9 codetools_0.2-19
## [61] spatstat.random_3.3-2 miniUI_0.1.1.1 spatstat.explore_3.3-3
## [64] listenv_0.9.1 plyr_1.8.9 lattice_0.22-6
## [67] tibble_3.2.1 shiny_1.8.1.1 withr_3.0.2
## [70] ROCR_1.0-11 evaluate_0.24.0 Rtsne_0.17
## [73] future_1.34.0 fastDummies_1.7.4 survival_3.8-3
## [76] polyclip_1.10-7 fitdistrplus_1.2-1 pillar_1.10.0
## [79] KernSmooth_2.23-26 plotly_4.10.4 generics_0.1.3
## [82] RcppHNSW_0.6.0 munsell_0.5.1 scales_1.3.0
## [85] xtable_1.8-4 globals_0.16.3 glue_1.8.0
## [88] lazyeval_0.2.2 tools_4.4.3 RSpectra_0.16-2
## [91] RANN_2.6.2 leiden_0.4.3.1 dotCall64_1.1-1
## [94] cowplot_1.1.3 grid_4.4.3 tidyr_1.3.1
## [97] colorspace_2.1-1 patchwork_1.2.0 nlme_3.1-167
## [100] cli_3.6.3 rappdirs_0.3.3 spatstat.sparse_3.1-0
## [103] spam_2.10-0 viridisLite_0.4.2 dplyr_1.1.4
## [106] uwot_0.2.2 gtable_0.3.6 sass_0.4.9
## [109] digest_0.6.36 progressr_0.14.0 ggrepel_0.9.5
## [112] htmlwidgets_1.6.4 farver_2.1.2 htmltools_0.5.8.1
## [115] lifecycle_1.0.4 httr_1.4.7 mime_0.12
## [118] MASS_7.3-65