10xGenomics, 1.3 Million Brain Cells from E18 Mice. FIt-SNE

library(bigMap)
# light-plot function
m10x.plot <- function(m, m10x, size = 250000, cex = 0.05, bg = '#FFFFFF')
{
    par(mar = c(.2, .2, .2, .2), oma = c(.2, .2, .2, .2), bg = bg)
    I <- sample(seq(nrow(m$ptsne$Y)))[1:size]
    plot(m$ptsne$Y[I, 1], m$ptsne$Y[I, 2], pch = 20, cex = cex, col = m10x$CellType1_pltt[m10x$CellType1[I]], bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', xlim = range(m$ptsne$Y[, 1]), ylim = range(m$ptsne$Y[, 2]))
}

Load data

load('../m10x.RData')
str(m10x)

## List of 4
##  $ pca50         : num [1:1306127, 1:50] -2.17 -2.65 4.44 -3.49 -3.72 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
##  $ CellId        : Factor w/ 1306127 levels "AAACCTGAGAAACCAT-109",..: 145 288 368 377 432 582 740 906 1113 1168 ...
##  $ CellType1     : num [1:1306127] 15 2 8 5 1 15 12 6 1 12 ...
##  $ CellType1_pltt: chr [1:39] "#FFFF00" "#1CE6FF" "#FF34FF" "#FF4A46" ...

FIt-SNE

# +++ start.R script:
source('~/FitSNE/fast_tsne.R', chdir = T)
# +++ load data
load('../m10x.RData')
# +++ run FIt-SNE
m.list <- lapply(c(32, 65, 130, 260, 320), function(ppx) {
  # +++ create bigMap instance
  m <- list(is.distance = F, is.sparse = F, normalize = T)
  m$ppx <- list(ppx = ppx)
  m$ptsne <- list(threads = 20, layers = 1, theta = 0.5)
  # +++ FIt-SNE
  t <- system.time({
    m$ptsne$Y <- fftRtsne(m10x$pca50, perplexity = ppx, nthreads = 20)
  })
  m$t <- list(fitsne = t)
  m
})
# +++ 
save(m.list, file = './10xm_FItSNE.RData')

Run using Rsckt (20 cores 24Gb/core):

~$ qsub -pe make 20 -l h_vmem=24G Rsckt ./start.R

Compute kNP+hlC

# +++ kNPhlC.R
library(bigMap)
# +++ load data
load('../m10x.RData')
# +++ load FIt-SNE output
load('./10xm_FItSNE.RData')
# +++ start MPI cluster
threads <- 100
mpi.cl <- bdm.mpi.start(threads)
if (is.null(mpi.cl)) return
# +++ compute kNP+hlC
m.list <- lapply(m.list, function(m) {
        m <- bdm.knp(m10x$pca50, m, k.max = 250000, sampling = 0.25, threads = threads, mpi.cl = mpi.cl)
        m <- bdm.hlCorr(NULL, m, threads = threads, mpi.cl = mpi.cl)
        m
})
# +++
save(m.list, file = './10xm_FItSNE.RData')
# +++ stop MPI cluster
bdm.mpi.stop(mpi.cl)

Run using Rsnow:

~$ qsub -pe ompi 131 -l h_vmem=6G Rsnow ./kNPhlC.R

Embedding

nulL <- lapply(m.list, function(m) m10x.plot(m, m10x, size = 130000))

hl-Correlation

hlTable <- sapply(m.list, function(m) round(mean(m$hlC), 4))
hlTable <- matrix(hlTable, nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
  kable_styling(full_width = F)

hl-Correlation
32	65	130	260	320
0.0519	0.0393	0.0602	0.0586	0.0596

Kary-neighborhood preservation

AUC <- sapply(m.list, function(m) m$kNP$AUC)
rownames(AUC) <- c('linAUC', 'logAUC')
colnames(AUC) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(AUC, caption = 'k-ary neighborhood preservation') %>%
  kable_styling(full_width = F)

k-ary neighborhood preservation
	32	65	130	260	320
linAUC	0.4516732	0.4491650	0.4668923	0.4742270	0.4727967
logAUC	0.2638983	0.2587079	0.2551080	0.2515182	0.2495228

nulL <- bdm.knp.plot(m.list)

Running Times

rTimes <- sapply(m.list, function(m) m$t$fitsne[3])
rTimes <- matrix(round(rTimes /60 , 2), nrow = 1)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('FIt-SNE')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
  kable_styling(full_width = F)

Computation times (min)
	32	65	130	260	320
FIt-SNE	40.52	65.55	119.28	324.15	482.42

Run on: AMD Opteron(tm) Processor 6380, 20 cores, 24GB/core RAM.

10xGenomics, 1.3 Million Brain Cells from E18 Mice. FIt-SNE

J.Garriga; F.Bartumeus
Theoretical and Computational Ecology Group (CEAB-CSIC)
jgarriga@ceab.csic.es

nov 2021

Load data

FIt-SNE

Compute kNP+hlC

Embedding

hl-Correlation

Kary-neighborhood preservation

Running Times

10xGenomics, 1.3 Million Brain Cells from E18 Mice. FIt-SNE

J.Garriga; F.Bartumeus Theoretical and Computational Ecology Group (CEAB-CSIC) jgarriga@ceab.csic.es

nov 2021

Load data

FIt-SNE

Compute kNP+hlC

Embedding

hl-Correlation

Kary-neighborhood preservation

Running Times

J.Garriga; F.Bartumeus
Theoretical and Computational Ecology Group (CEAB-CSIC)
jgarriga@ceab.csic.es