Visualizations of SNF Clusters

van der Maaten & Hinton (2008) t-Distributed Stochastic Neighbor Embedding - converts high dimensional Euclidean distances between datapoints into conditional probabilities that represent similarities. Aims to alleviate problem of optimizing cost function in SNE. The t-SNE cost function uses a Student t-distribution rather than a gaussian to compute the similarity between two points in the low-dimensional space

A feature of t-SNE is a tuneable parameter, “perplexity,” which says (loosely) how to balance attention between local and global aspects of your data. The parameter is, in a sense, a guess about the number of close neighbors each point has. The perplexity value has a complex effect on the resulting pictures. The original paper says, “The performance of SNE is fairly robust to changes in the perplexity, and typical values are between 5 and 50.” But the story is more nuanced than that. Getting the most from t-SNE may mean analyzing multiple plots with different perplexities. Considerations: -The perplexity really should be smaller than the number of points -the cluster sizes don’t mean anything, distances between them also might not mean anything - random noise doesn’t always look random

Top 2 features by NMI for each data type

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

tsne <- Rtsne(train[,-1], dims = 3, perplexity=10, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 20

tsne <- Rtsne(train[,-1], dims = 3, perplexity=20, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 3, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 40

tsne <- Rtsne(train[,-1], dims = 3, perplexity=40, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 50

tsne <- Rtsne(train[,-1], dims = 3, perplexity=50, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Changing up dimensions

Dimsensions = 1, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 1, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Dimsensions = 2, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Top 15 features by NMI (cortical thickness, inattention, hyperactivity)

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

tsne <- Rtsne(train[,-1], dims = 3, perplexity=10, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 20

tsne <- Rtsne(train[,-1], dims = 3, perplexity=20, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 3, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 40

tsne <- Rtsne(train[,-1], dims = 3, perplexity=40, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 50

tsne <- Rtsne(train[,-1], dims = 3, perplexity=50, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Changing up dimensions

Dimsensions = 1, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 1, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Dimsensions = 2, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Diagnosis labels with the top 2 NMI features of each type

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

tsne <- Rtsne(train[,-1], dims = 3, perplexity=10, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 20

tsne <- Rtsne(train[,-1], dims = 3, perplexity=20, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 3, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 40

tsne <- Rtsne(train[,-1], dims = 3, perplexity=40, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 50

tsne <- Rtsne(train[,-1], dims = 3, perplexity=50, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Changing up dimensions

Dimsensions = 1, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 1, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Dimsensions = 2, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Diagnosis labels with the top 15 NMI features

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

tsne <- Rtsne(train[,-1], dims = 3, perplexity=10, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 20

tsne <- Rtsne(train[,-1], dims = 3, perplexity=20, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 3, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 40

tsne <- Rtsne(train[,-1], dims = 3, perplexity=40, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Perplexity = 50

tsne <- Rtsne(train[,-1], dims = 3, perplexity=50, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Changing up dimensions

Dimsensions = 1, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 1, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Dimsensions = 2, Perplexity = 30

tsne <- Rtsne(train[,-1], dims = 2, perplexity=30, verbose=FALSE, max_iter = max_iter)
plot(tsne$Y, t='n', main="tsne")
text(tsne$Y, labels=train$Labels, col=colors[train$Labels])

Visualizations of SNF Clusters

Grace Jacobs

March 19, 2019

Top 2 features by NMI for each data type

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

Perplexity = 20

Perplexity = 30

Perplexity = 40

Perplexity = 50

Changing up dimensions

Dimsensions = 1, Perplexity = 30

Dimsensions = 2, Perplexity = 30

Top 15 features by NMI (cortical thickness, inattention, hyperactivity)

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

Perplexity = 20

Perplexity = 30

Perplexity = 40

Perplexity = 50

Changing up dimensions

Dimsensions = 1, Perplexity = 30

Dimsensions = 2, Perplexity = 30

Diagnosis labels with the top 2 NMI features of each type

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

Perplexity = 20

Perplexity = 30

Perplexity = 40

Perplexity = 50

Changing up dimensions

Dimsensions = 1, Perplexity = 30

Dimsensions = 2, Perplexity = 30

Diagnosis labels with the top 15 NMI features

Number of iterations <- 20000

3 Dimensions

Perplexity = 10

Perplexity = 20

Perplexity = 30

Perplexity = 40

Perplexity = 50

Changing up dimensions

Dimsensions = 1, Perplexity = 30

Dimsensions = 2, Perplexity = 30