10xGenomics, 1.3 Million Brain Cells from E18 Mice. ptSNE

library(bigMap)
# light-plot function
m10x.plot <- function(m, m10x, size = 250000, cex = 0.05, bg = '#FFFFFF')
{
    par(mar = c(.2, .2, .2, .2), oma = c(.2, .2, .2, .2), bg = bg)
    I <- sample(seq(nrow(m$ptsne$Y)))[1:size]
    plot(m$ptsne$Y[I, 1], m$ptsne$Y[I, 2], pch = 20, cex = cex, col = m10x$CellType1_pltt[m10x$CellType1[I]], bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', xlim = range(m$ptsne$Y[, 1]), ylim = range(m$ptsne$Y[, 2]))
}

Load data

load('../m10x.RData')
str(m10x)

## List of 4
##  $ pca50         : num [1:1306127, 1:50] -2.17 -2.65 4.44 -3.49 -3.72 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
##  $ CellId        : Factor w/ 1306127 levels "AAACCTGAGAAACCAT-109",..: 145 288 368 377 432 582 740 906 1113 1168 ...
##  $ CellType1     : num [1:1306127] 15 2 8 5 1 15 12 6 1 12 ...
##  $ CellType1_pltt: chr [1:39] "#FFFF00" "#1CE6FF" "#FF34FF" "#FF4A46" ...

Run ptSNE

# +++ ./start.R script:

library(bigMap)

# +++ load data
load('~/omics/10xmouse/m10x.RData')

# +++ parameters
ppx.list <- round(nrow(m10x$pca50) *c(0.0001, 0.0005, 0.001, 0.005, 0.01), 0)
threads <- 130

# +++ start MPI cluster
mpi.cl <- bdm.mpi.start(threads)
if (is.null(mpi.cl)) return

# +++ run
m.list <- lapply(ppx.list, function(ppx)
{
  # +++ betas
  m <- bdm.init(m10x$pca50, ppx = ppx, threads = threads, mpi.cl = mpi.cl)
  # +++ ptSNE
  m <- bdm.ptsne(NULL, m, theta = 0.5, threads = threads, layers = 2, mpi.cl = mpi.cl)
  # +++ hlCorrelation
  m <- bdm.hlCorr(NULL, m, threads = threads, mpi.cl = mpi.cl)
  # +++ kNP
  m <- bdm.knp(NULL, m, k.max = 250000, sampling = 0.25, threads = threads, mpi.cl = mpi.cl)
  m
}
save(m.list, file = './mlist.RData')

# +++ stop MPI cluster
bdm.mpi.stop(mpi.cl)

Run using Rsnow (MPI) (130 cores, 8Gb/core):

$ qsub -pe ompi 131 -l h_vmem=8G Rsnow ./start.R

Embedding cost/size function

nulL <- lapply(m.list, function(m) bdm.cost(m))

Output

nulL <- lapply(m.list, function(m) m10x.plot(m, m10x, size = 130000))

hl-Correlation

hlTable <- sapply(m.list, function(m) round(mean(m$hlC), 4))
hlTable <- matrix(hlTable, nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
  kable_styling(full_width = F)

hl-Correlation
130	653	1306	6531	13061	65306
0.0478	0.0581	0.0642	0.0644	0.0685	0.057

Kary-neighborhood preservation

AUC <- sapply(m.list, function(m) m$kNP$AUC)
rownames(AUC) <- c('linAUC', 'logAUC')
colnames(AUC) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(AUC, caption = 'k-ary neighborhood preservation') %>%
  kable_styling(full_width = F)

k-ary neighborhood preservation
	130	653	1306	6531	13061	65306
linAUC	0.3531913	0.4255113	0.4383635	0.4611814	0.4669093	0.5028970
logAUC	0.1813826	0.1991758	0.2009565	0.2046254	0.2058481	0.2047087

nulL <- bdm.knp.plot(m.list)

Running Times

rTimes <- sapply(m.list, function(m) c(m$ppx$t[3], m$t$epoch, m$t$ptsne[3], (m$ppx$t[3] +m$t$ptsne[3])))
rTimes <- round(rTimes /60 , 1)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'pt-SNE', 'total')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
  kable_styling(full_width = F)

Computation times (min)
	130	653	1306	6531	13061	65306
betas	70.9	71.7	82.2	92.8	65.2	71.2
epoch	4.2	3.5	6.0	3.3	3.5	4.3
pt-SNE	244.5	201.4	189.1	191.9	206.1	251.3
total	315.4	273.0	271.2	284.6	271.3	322.5

Intel(R) Xeon(R) CPU E5-2650 v3 2.30GHz, 131 cores, 8GB/core RAM.

10xGenomics, 1.3 Million Brain Cells from E18 Mice. ptSNE

J.Garriga; F.Bartumeus
Theoretical and Computational Ecology Group (CEAB-CSIC)
jgarriga@ceab.csic.es

bigMap_4.6.0, dic 2021

Load data

Run ptSNE

Embedding cost/size function

Output

hl-Correlation

Kary-neighborhood preservation

Running Times

10xGenomics, 1.3 Million Brain Cells from E18 Mice. ptSNE

J.Garriga; F.Bartumeus Theoretical and Computational Ecology Group (CEAB-CSIC) jgarriga@ceab.csic.es

bigMap_4.6.0, dic 2021

Load data

Run ptSNE

Embedding cost/size function

Output

hl-Correlation

Kary-neighborhood preservation

Running Times

J.Garriga; F.Bartumeus
Theoretical and Computational Ecology Group (CEAB-CSIC)
jgarriga@ceab.csic.es