library(bigMap)

Load data

load('./tsc50.RData')
# 50 Principal components + related stuff (labels, color-palette, names, first 2 PC)
str(tsc50)
## List of 5
##  $ data : num [1:23822, 1:50] 31.4 44.5 -33 -32.1 -32.4 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
##  $ lbls : num [1:23822] 94 73 2 8 8 8 1 11 74 66 ...
##  $ pltt : chr [1:133] "#DDACC9" "#FF88AD" "#FFB8CE" "#DD6091" ...
##  $ names: Factor w/ 133 levels "Astro Aqp4","CR Lhx5",..: 62 60 61 65 66 64 63 90 92 89 ...
##  $ pca  : num [1:23822, 1:2] -31.4 -44.5 33 32.1 32.4 ...

Run pt-SNE (20 cores)

Run the script below using a HPC node with 20 cores. With respect to the same analysis using 20 cores running times are larger but local accuracy improves.


# +++ load data
load('./tsc50.RData')
X50 <- tsc50$data

# +++ initialize for a range of perplexities
ppx <- round(nrow(X50) * c(.001, .005, .01, .05, .10, .20), 0)
m <- bdm.init(X50, dSet.name = 'Tasic50', ppx = ppx, threads = 20, dSet.labels = tsc50$lbls)

# +++ run ptSNE (5 threads, 2 layers)
m.list <- bdm.ptsne(X50, m, lRate = NULL, theta = .0, threads = 20, layers = 2)

# +++ kNP + hlC
m.list <- lapply(m.list, function(m){
        m <- bdm.knp(X50, m, threads = 20)
        m <- bdm.hlCorr(X50, m, threads = 20)
        m
})

# +++ save
save(m.list, file = './mlist_z20.RData')

Load results

load('./mlist_z20.RData')

Embedding cost/size function

nulL <- lapply(m.list, function(m) bdm.cost(m))

Embedding

nulL <- lapply(m.list, function(m) {
  m$lbls <- tsc50$lbls
  bdm.ptsne.plot(m, ptsne.cex = 0.5, class.pltt = tsc50$pltt)
})

hl-Correlation

hlTable <- sapply(m.list, function(m) mean(m$hlC))
hlTable <- matrix(round(hlTable, 4), nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(hlTable) <- c('<hlC>')
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
  kable_styling(full_width = F)
hl-Correlation
24 119 238 1191 2382 4764
<hlC> 0.1172 0.1273 0.4594 0.2843 0.2941 0.3725

Kary-neighborhood preservation

bdm.knp.plot(m.list)

Running Times

rTimes <- sapply(m.list, function(m) c(m$ppx$t[3], m$t$epoch, m$t$ptsne[3], (m$ppx$t[3] +m$t$ptsne[3])))
rTimes <- round(rTimes /60, 2)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'ptsne', 'total')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
  kable_styling(full_width = F)
Computation times (min)
24 119 238 1191 2382 4764
betas 0.06 0.04 0.04 0.04 0.04 0.05
epoch 0.02 0.02 0.02 0.03 0.03 0.04
ptsne 0.75 0.76 0.80 1.04 1.31 1.80
total 0.81 0.79 0.84 1.08 1.35 1.85

Run on: Intel(R) Xeon(R) CPU E5-2650 v3 2.30GHz, 32Mb cache, 20 cores, 4GB/core RAM.