library(bigMap)
load('./tsc50.RData')
# 50 Principal components + related stuff (labels, color-palette, names, first 2 PC)
str(tsc50)
## List of 5
## $ data : num [1:23822, 1:50] 31.4 44.5 -33 -32.1 -32.4 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
## $ lbls : num [1:23822] 94 73 2 8 8 8 1 11 74 66 ...
## $ pltt : chr [1:133] "#DDACC9" "#FF88AD" "#FFB8CE" "#DD6091" ...
## $ names: Factor w/ 133 levels "Astro Aqp4","CR Lhx5",..: 62 60 61 65 66 64 63 90 92 89 ...
## $ pca : num [1:23822, 1:2] -31.4 -44.5 33 32.1 32.4 ...
Run the script below using a HPC node with 20 cores. With respect to the same analysis using 20 cores running times are larger but local accuracy improves.
# +++ load data
load('./tsc50.RData')
X50 <- tsc50$data
# +++ initialize for a range of perplexities
ppx <- round(nrow(X50) * c(.001, .005, .01, .05, .10, .20), 0)
m <- bdm.init(X50, dSet.name = 'Tasic50', ppx = ppx, threads = 20, dSet.labels = tsc50$lbls)
# +++ run ptSNE (5 threads, 2 layers)
m.list <- bdm.ptsne(X50, m, lRate = NULL, theta = .0, threads = 20, layers = 2)
# +++ kNP + hlC
m.list <- lapply(m.list, function(m){
m <- bdm.knp(X50, m, threads = 20)
m <- bdm.hlCorr(X50, m, threads = 20)
m
})
# +++ save
save(m.list, file = './mlist_z20.RData')
load('./mlist_z20.RData')
nulL <- lapply(m.list, function(m) bdm.cost(m))
nulL <- lapply(m.list, function(m) {
m$lbls <- tsc50$lbls
bdm.ptsne.plot(m, ptsne.cex = 0.5, class.pltt = tsc50$pltt)
})
hlTable <- sapply(m.list, function(m) mean(m$hlC))
hlTable <- matrix(round(hlTable, 4), nrow = 1)
colnames(hlTable) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(hlTable) <- c('<hlC>')
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
kable_styling(full_width = F)
| 24 | 119 | 238 | 1191 | 2382 | 4764 | |
|---|---|---|---|---|---|---|
| <hlC> | 0.1172 | 0.1273 | 0.4594 | 0.2843 | 0.2941 | 0.3725 |
bdm.knp.plot(m.list)
rTimes <- sapply(m.list, function(m) c(m$ppx$t[3], m$t$epoch, m$t$ptsne[3], (m$ppx$t[3] +m$t$ptsne[3])))
rTimes <- round(rTimes /60, 2)
colnames(rTimes) <- sapply(m.list, function(m) m$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'ptsne', 'total')
knitr::kable(rTimes, caption = 'Computation times (min)') %>%
kable_styling(full_width = F)
| 24 | 119 | 238 | 1191 | 2382 | 4764 | |
|---|---|---|---|---|---|---|
| betas | 0.06 | 0.04 | 0.04 | 0.04 | 0.04 | 0.05 |
| epoch | 0.02 | 0.02 | 0.02 | 0.03 | 0.03 | 0.04 |
| ptsne | 0.75 | 0.76 | 0.80 | 1.04 | 1.31 | 1.80 |
| total | 0.81 | 0.79 | 0.84 | 1.08 | 1.35 | 1.85 |
Run on: Intel(R) Xeon(R) CPU E5-2650 v3 2.30GHz, 32Mb cache, 20 cores, 4GB/core RAM.