library(bigMap)
# light-plot function
m10x.plot <- function(m, m10x, size = 250000, cex = 0.05, bg = '#FFFFFF')
{
    par(mar = c(.2, .2, .2, .2), oma = c(.2, .2, .2, .2), bg = bg)
    I <- sample(seq(nrow(m$ptsne$Y)))[1:size]
    plot(m$ptsne$Y[I, 1], m$ptsne$Y[I, 2], pch = 20, cex = cex, col = m10x$CellType1_pltt[m10x$CellType1[I]], bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', xlim = range(m$ptsne$Y[, 1]), ylim = range(m$ptsne$Y[, 2]))
}

Load data

load('../m10x.RData')
str(m10x)
## List of 4
##  $ pca50         : num [1:1306127, 1:50] -2.17 -2.65 4.44 -3.49 -3.72 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
##  $ CellId        : Factor w/ 1306127 levels "AAACCTGAGAAACCAT-109",..: 145 288 368 377 432 582 740 906 1113 1168 ...
##  $ CellType1     : num [1:1306127] 15 2 8 5 1 15 12 6 1 12 ...
##  $ CellType1_pltt: chr [1:39] "#FFFF00" "#1CE6FF" "#FF34FF" "#FF4A46" ...

Run ptSNE

# +++ ./start.R script:

library(bigMap)

# +++ load data
load('~/omics/10xmouse/m10x.RData')

# +++ parameters
ppx.list <- round(nrow(m10x$pca50) *c(0.0001, 0.0005, 0.001, 0.005, 0.01), 0)
threads <- 130

# +++ start MPI cluster
mpi.cl <- bdm.mpi.start(threads)
if (is.null(mpi.cl)) return

# +++ run
m.list <- lapply(ppx.list, function(ppx)
{
  # +++ betas
  m <- bdm.init(m10x$pca50, ppx = ppx, threads = threads, mpi.cl = mpi.cl)
  # +++ ptSNE
  m <- bdm.ptsne(NULL, m, theta = 0.5, threads = threads, layers = 2, mpi.cl = mpi.cl)
  # +++ hlCorrelation
  m <- bdm.hlCorr(NULL, m, threads = threads, mpi.cl = mpi.cl)
  # +++ kNP
  m <- bdm.knp(NULL, m, k.max = 250000, sampling = 0.25, threads = threads, mpi.cl = mpi.cl)
  m
}
save(m.list, file = './mlist.RData')

# +++ stop MPI cluster
bdm.mpi.stop(mpi.cl)

Run using Rsnow (MPI) (130 cores, 8Gb/core):

$ qsub -pe ompi 131 -l h_vmem=8G Rsnow ./start.R

Embedding cost/size function

nulL <- lapply(m.list, function(m) bdm.cost(m))

Output

nulL <- lapply(m.list, function(m) m10x.plot(m, m10x, size = 130000))

hl-Correlation

hlTable <- sapply(m.list, function(m) round(mean(m$hlC), 4))
hlTable <- matrix(hlTable, nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
  kable_styling(full_width = F)
hl-Correlation
130 653 1306 6531 13061 65306
0.0478 0.0581 0.0642 0.0644 0.0685 0.057

Kary-neighborhood preservation

AUC <- sapply(m.list, function(m) m$kNP$AUC)
rownames(AUC) <- c('linAUC', 'logAUC')
colnames(AUC) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(AUC, caption = 'k-ary neighborhood preservation') %>%
  kable_styling(full_width = F)
k-ary neighborhood preservation
130 653 1306 6531 13061 65306
linAUC 0.3531913 0.4255113 0.4383635 0.4611814 0.4669093 0.5028970
logAUC 0.1813826 0.1991758 0.2009565 0.2046254 0.2058481 0.2047087
nulL <- bdm.knp.plot(m.list)

Running Times

rTimes <- sapply(m.list, function(m) c(m$ppx$t[3], m$t$epoch, m$t$ptsne[3], (m$ppx$t[3] +m$t$ptsne[3])))
rTimes <- round(rTimes /60 , 1)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'pt-SNE', 'total')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
  kable_styling(full_width = F)
Computation times (min)
130 653 1306 6531 13061 65306
betas 70.9 71.7 82.2 92.8 65.2 71.2
epoch 4.2 3.5 6.0 3.3 3.5 4.3
pt-SNE 244.5 201.4 189.1 191.9 206.1 251.3
total 315.4 273.0 271.2 284.6 271.3 322.5

Intel(R) Xeon(R) CPU E5-2650 v3 2.30GHz, 131 cores, 8GB/core RAM.