R Markdown

Running TSNE on 33% of the training data

## calling the installed package
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
train_full <- read.csv("mnist_train.csv",header = F,stringsAsFactors = T)
train <- sample_frac(train_full,0.33)
library(Rtsne)
## Curating the database for analysis with both t-SNE and PCA
Labels<-train$V1
train$V1<-as.factor(train$V1)
## for plotting
colors = rainbow(length(unique(train$V1)))
names(colors) = unique(train$V1)

## Executing the algorithm on curated data
tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=TRUE, max_iter = 500)
## Read the 19800 x 50 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Normalizing input...
## Building tree...
##  - point 0 of 19800
##  - point 10000 of 19800
## Done in 52.01 seconds (sparsity = 0.006232)!
## Learning embedding...
## Iteration 50: error is 105.698626 (50 iterations in 11.23 seconds)
## Iteration 100: error is 105.546000 (50 iterations in 11.57 seconds)
## Iteration 150: error is 92.480254 (50 iterations in 11.29 seconds)
## Iteration 200: error is 90.715966 (50 iterations in 10.75 seconds)
## Iteration 250: error is 90.316175 (50 iterations in 10.71 seconds)
## Iteration 300: error is 3.786182 (50 iterations in 10.27 seconds)
## Iteration 350: error is 3.382234 (50 iterations in 10.08 seconds)
## Iteration 400: error is 3.149107 (50 iterations in 10.21 seconds)
## Iteration 450: error is 2.990705 (50 iterations in 10.30 seconds)
## Iteration 500: error is 2.872066 (50 iterations in 10.49 seconds)
## Fitting performed in 106.90 seconds.
exeTimeTsne<- system.time(Rtsne(train[,-1], dims = 2, perplexity=30, verbose=TRUE, max_iter = 500))
## Read the 19800 x 50 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Normalizing input...
## Building tree...
##  - point 0 of 19800
##  - point 10000 of 19800
## Done in 58.75 seconds (sparsity = 0.006232)!
## Learning embedding...
## Iteration 50: error is 105.698626 (50 iterations in 11.32 seconds)
## Iteration 100: error is 105.607010 (50 iterations in 16.28 seconds)
## Iteration 150: error is 93.986181 (50 iterations in 12.16 seconds)
## Iteration 200: error is 91.927599 (50 iterations in 13.90 seconds)
## Iteration 250: error is 90.984088 (50 iterations in 14.33 seconds)
## Iteration 300: error is 3.808208 (50 iterations in 11.36 seconds)
## Iteration 350: error is 3.399102 (50 iterations in 10.60 seconds)
## Iteration 400: error is 3.160409 (50 iterations in 10.79 seconds)
## Iteration 450: error is 2.996960 (50 iterations in 10.71 seconds)
## Iteration 500: error is 2.875774 (50 iterations in 10.71 seconds)
## Fitting performed in 122.16 seconds.

Including Plots

plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$V1, col=colors[train$V1])