library(bigMap)
# light-plot function
m10x.plot <- function(m, m10x, size = 250000, cex = 0.05, bg = '#FFFFFF')
{
par(mar = c(.2, .2, .2, .2), oma = c(.2, .2, .2, .2), bg = bg)
I <- sample(seq(nrow(m$ptsne$Y)))[1:size]
plot(m$ptsne$Y[I, 1], m$ptsne$Y[I, 2], pch = 20, cex = cex, col = m10x$CellType1_pltt[m10x$CellType1[I]], bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', xlim = range(m$ptsne$Y[, 1]), ylim = range(m$ptsne$Y[, 2]))
}
load('../m10x.RData')
str(m10x)
## List of 4
## $ pca50 : num [1:1306127, 1:50] -2.17 -2.65 4.44 -3.49 -3.72 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
## $ CellId : Factor w/ 1306127 levels "AAACCTGAGAAACCAT-109",..: 145 288 368 377 432 582 740 906 1113 1168 ...
## $ CellType1 : num [1:1306127] 15 2 8 5 1 15 12 6 1 12 ...
## $ CellType1_pltt: chr [1:39] "#FFFF00" "#1CE6FF" "#FF34FF" "#FF4A46" ...
# +++ start.R script:
source('~/FitSNE/fast_tsne.R', chdir = T)
# +++ load data
load('../m10x.RData')
# +++ run FIt-SNE
m.list <- lapply(c(32, 65, 130, 260, 320), function(ppx) {
# +++ create bigMap instance
m <- list(is.distance = F, is.sparse = F, normalize = T)
m$ppx <- list(ppx = ppx)
m$ptsne <- list(threads = 20, layers = 1, theta = 0.5)
# +++ FIt-SNE
t <- system.time({
m$ptsne$Y <- fftRtsne(m10x$pca50, perplexity = ppx, nthreads = 20)
})
m$t <- list(fitsne = t)
m
})
# +++
save(m.list, file = './10xm_FItSNE.RData')
Run using Rsckt (20 cores 24Gb/core):
~$ qsub -pe make 20 -l h_vmem=24G Rsckt ./start.R
# +++ kNPhlC.R
library(bigMap)
# +++ load data
load('../m10x.RData')
# +++ load FIt-SNE output
load('./10xm_FItSNE.RData')
# +++ start MPI cluster
threads <- 100
mpi.cl <- bdm.mpi.start(threads)
if (is.null(mpi.cl)) return
# +++ compute kNP+hlC
m.list <- lapply(m.list, function(m) {
m <- bdm.knp(m10x$pca50, m, k.max = 250000, sampling = 0.25, threads = threads, mpi.cl = mpi.cl)
m <- bdm.hlCorr(NULL, m, threads = threads, mpi.cl = mpi.cl)
m
})
# +++
save(m.list, file = './10xm_FItSNE.RData')
# +++ stop MPI cluster
bdm.mpi.stop(mpi.cl)
Run using Rsnow:
~$ qsub -pe ompi 131 -l h_vmem=6G Rsnow ./kNPhlC.R
nulL <- lapply(m.list, function(m) m10x.plot(m, m10x, size = 130000))
hlTable <- sapply(m.list, function(m) round(mean(m$hlC), 4))
hlTable <- matrix(hlTable, nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
kable_styling(full_width = F)
| 32 | 65 | 130 | 260 | 320 |
|---|---|---|---|---|
| 0.0519 | 0.0393 | 0.0602 | 0.0586 | 0.0596 |
AUC <- sapply(m.list, function(m) m$kNP$AUC)
rownames(AUC) <- c('linAUC', 'logAUC')
colnames(AUC) <- sapply(m.list, function(m) m$ppx$ppx)
knitr::kable(AUC, caption = 'k-ary neighborhood preservation') %>%
kable_styling(full_width = F)
| 32 | 65 | 130 | 260 | 320 | |
|---|---|---|---|---|---|
| linAUC | 0.4516732 | 0.4491650 | 0.4668923 | 0.4742270 | 0.4727967 |
| logAUC | 0.2638983 | 0.2587079 | 0.2551080 | 0.2515182 | 0.2495228 |
nulL <- bdm.knp.plot(m.list)
rTimes <- sapply(m.list, function(m) m$t$fitsne[3])
rTimes <- matrix(round(rTimes /60 , 2), nrow = 1)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('FIt-SNE')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
kable_styling(full_width = F)
| 32 | 65 | 130 | 260 | 320 | |
|---|---|---|---|---|---|
| FIt-SNE | 40.52 | 65.55 | 119.28 | 324.15 | 482.42 |
Run on: AMD Opteron(tm) Processor 6380, 20 cores, 24GB/core RAM.