1. irisデータを用いて、tsne, prcomp, Rtsneを比較する。
  2. coil20のデータを用いてprcomp, Rtsneを比較する。

1 tsne, prcomp, Rtsneを比較

1.1 tsne

1.2 PCA

source("~/pub/bin/r/myfunc.R")
# named vector
names(color) <- iris$Species

# princompと prcomp
iris.pri <- princomp(iris[,1:4])$scores[,1:2]
iris.prc <- prcomp(iris[,1:4], scale. = T)

par(mfrow=c(1,2))
plot(iris.pri, pch=16, col=color, main="iris_princomp")
plot(iris.prc$x[,1], iris.prc$x[,2], pch=16, col=color, main="iris_prcomp")

# # named vector
# library(RColorBrewer)
# color <- brewer.pal(3, "Set1")[iris$Species];
# names(color) <- iris$Species
# 
# # plot
# res <- MYGGPCAT(dat = iris[,1:4], i = 1, j = 2, scales = T, cols = color, pch=16)

1.3 Rtsne

  • t-SNE の高速版。主成分分析(prcomp)をしてからt-SNEを実行
  • Barnes-Hut-SNE(1301.3342)
  • initial_dims始めに行うPCAの保持する次元数
  • dims出力する次元の数 def : 2
  • theta 0.0に近いほど精度高(実行時間とtrade-off) def : 0.5
  • verbose=TRUE で途中経過出力
# デフォルト
# Rtsne(X, dims = 2, initial_dims = 50, perplexity = 30,
#   theta = 0.5, check_duplicates = TRUE, pca = TRUE, max_iter = 1000,
#   verbose = FALSE, is_distance = FALSE, ...)

library(Rtsne); library(ggplot2)
set.seed(1) # 再現性の確保
iris.rtsne <- Rtsne(as.matrix(iris[,1:4]), check_duplicates = FALSE, verbose=F)

d <- data.frame(iris.rtsne$Y, iris$Species)
gg.pca <- ggplot(data=d,
                 aes(x=d[,1], y=d[,2], col=iris.Species)) +
          geom_point() +
          theme_bw() +
          labs(x="tsne1", y="tsne2",title="Rtsne")
print(gg.pca)

2 coil20のデータを用いてprcomp, Rtsneを比較

2.1 coil20データの処理

  • 画像読み込み(20種類 x 72パターン)
  • 全画像イメージを読み込み32 x 32 (pixel)に圧縮
p <- "~/pub/dat/sampledata/coil20/coil-20-proc"
fi <- list.files(p, ".png", full.names = T)
suppressMessages(suppressWarnings(library(EBImage)))
options(EBImage.display="raster", stringsAsFactors = F)

# 全イメージ読み込んで32x32に圧縮 ----
img.mat <- matrix(NA, ncol=1440, nrow = 1024)
for(i in 1:length(fi)){
  img <- readImage(fi[i])
  img.mat[,i] <- as.vector(imageData(resize(img, 32)))
}

v.fi <- sub(".png", "",unlist(lapply(strsplit(fi, "/"), function(x){tail(x, 1)})))
coil20 <- data.frame(img.mat)
names(coil20) <- v.fi

# imageをdisplay ----
imgs <- list()
simgs <- fi[grep("__1.png", fi)]
for(i in 1:length(simgs)){
  imgs[[i]] <- resize(readImage(simgs[i]), 32)
}
img_comb <- EBImage::combine(imgs)

display(tile(img_comb, lwd = 5,nx=4, fg.col = "white"), all=T)
text(x = rep(seq(3,4*37,37), 5), 
     y = rep(seq(5,5*32,37), each=4),
     labels = as.character(c(1:20)), col = "red", font=2)

# カラーコード ----
col20 <- c("#367800","#8f34bb","#93d639","#0149c0",
           "#bcc80a","#c9007e","#01ac82","#ff5fb9",
           "#2e5d1f","#b10063","#94d5a3","#7b3b70",
           "#ffaf3e","#00a5f9","#c25600","#275392",
           "#ffb197","#007c6b","#ffaadf","#94b6eb")
cols <- rep(col20, each=72)

2.2 PCA

res.pca <- MYPCAT(dat = t(coil20), i = 1, j = 2, scaled = T,cols = cols, pch=20, main="PCA_coil20")
legend("topright", legend = as.character(1:20), pt.cex=0.7, pch=20, col=unique(cols), ncol=4, cex=0.5)

2.3 Rtsne

library(Rtsne)
set.seed(1)
res1.tsne <- Rtsne(t(coil20), perplexity = 18 )
res2.tsne <- Rtsne(t(coil20), perplexity = 18, theta = 0.1)
res3.tsne <- Rtsne(t(coil20), perplexity = 18, check_duplicates = FALSE)

par(mfrow=c(2,2))
MYPCAT(dat = res.pca, i = 1, j = 2, scaled = T, cols = cols, pch=20, main="")
## NULL
plot(res1.tsne$Y, pch=16, col=cols, main="Rtsne_coil20")
legend("topright", legend = as.character(1:20), pt.cex=0.7, pch=20, col=unique(cols), ncol=4, cex=0.5)
plot(res2.tsne$Y, pch=16, col=cols, main="Rtsne_coil20")
plot(res3.tsne$Y, pch=16, col=cols, main="Rtsne_coil20")