rm(list=ls()); gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 779609 41.7 1487968 79.5 1206640 64.5
Vcells 1493337 11.4 8388608 64.0 3051988 23.3
library(EBImage)
library(mxnet)
K = 36
X <- read.csv("data/olivetti_X.csv", header = F)
labels <- read.csv("data/olivetti_y.csv", header = F)
rs_df <- data.frame()
dim(X)
[1] 400 4096
dim(labels)
[1] 400 1
# Main loop: for each image, resize and set it to greyscale
for(i in 1:nrow(X)) {
# Try-catch
result <- tryCatch({
# Image (as 1d vector)
img <- as.numeric(X[i,])
# Reshape as a 64x64 image (EBImage object)
img <- Image(img, dim=c(64, 64), colormode = "Grayscale")
# Resize image to KxK pixels
img_resized <- resize(img, w = K, h = K)
# Get image matrix (there should be another function
# to do this faster and more neatly!)
img_matrix <- img_resized@.Data
# Coerce to a vector
img_vector <- as.vector(t(img_matrix))
# Add label
label <- labels[i,]
vec <- c(label, img_vector)
# Stack in rs_df using rbind
rs_df <- rbind(rs_df, vec)
# Print status
print(paste("Done",i,sep = " "))},
# Error function (just prints the error). Btw you should get no errors!
error = function(e){print(e)})
}
names(rs_df) <- c("label", paste("pixel", c(1:(K^2))))
output omited.
par(mfrow=c(2,5),mar=c(1,1,1,1))
for(i in 1:10) {
img <- as.numeric(X[i,])
img <- Image(img, dim=c(64, 64), colormode = "Grayscale")
m = t(apply(img@.Data, 1, rev))
image(m, axes = FALSE, col = grey(seq(0, 1, length = 256)))}
par(mfrow=c(2,5),mar=c(1,1,1,1))
for(i in 1:10) {
m = matrix(as.numeric(rs_df[i,2:ncol(rs_df)]),K,K,byrow=T)
m = t(apply(m, 1, rev))
image(m, axes = FALSE, col = grey(seq(0, 1, length = 256))) }
set.seed(100)
shuffled <- rs_df[sample(1:400),]
train <- shuffled[1:360, ]
test <- shuffled[361:400, ]
library(ggplot2)
data.frame(
dataset = c(rep('train',360), rep('test',40)),
label = c(test$label, train[,1])) %>%
ggplot(aes(label, fill=dataset)) +
geom_histogram(col='white',binwidth=1,alpha=0.55) +
scale_y_continuous(breaks=seq(0,10,2)) +
ggtitle("400 Face Pictures",
"40 Faces, Each 10 Pictures, 10% Testing Data")
train <- data.matrix(train)
train_x <- t(train[, -1])
train_y <- train[, 1]
train_array <- train_x
dim(train_array) <- c(K, K, 1, ncol(train_x))
test_x <- t(test[, -1])
test_y <- test[, 1]
test_array <- test_x
dim(test_array) <- c(K, K, 1, ncol(test_x))
data <- mx.symbol.Variable('data')
# 1st convolutional layer
conv_1 <- mx.symbol.Convolution(data = data, kernel = c(5, 5), num_filter = 20)
tanh_1 <- mx.symbol.Activation(data = conv_1, act_type = "tanh")
pool_1 <- mx.symbol.Pooling(data = tanh_1, pool_type = "max", kernel = c(2, 2), stride = c(2, 2))
# 2nd convolutional layer
conv_2 <- mx.symbol.Convolution(data = pool_1, kernel = c(5, 5), num_filter = 50)
tanh_2 <- mx.symbol.Activation(data = conv_2, act_type = "tanh")
pool_2 <- mx.symbol.Pooling(data=tanh_2, pool_type = "max", kernel = c(2, 2), stride = c(2, 2))
# 1st fully connected layer
flatten <- mx.symbol.Flatten(data = pool_2)
fc_1 <- mx.symbol.FullyConnected(data = flatten, num_hidden = 200)
tanh_3 <- mx.symbol.Activation(data = fc_1, act_type = "tanh")
# 2nd fully connected layer
fc_2 <- mx.symbol.FullyConnected(data = tanh_3, num_hidden = 40)
# Output. Softmax output since we'd like to get some probabilities.
NN_model <- mx.symbol.SoftmaxOutput(data = fc_2)
mx.set.seed(100)
devices <- mx.cpu()
# Train the model
model <- mx.model.FeedForward.create(
NN_model,
X = train_array,
y = train_y,
ctx = devices,
num.round = 150, # 300
array.batch.size = 20, # 40
learning.rate = 0.01,
momentum = 0.9,
eval.metric = mx.metric.accuracy,
epoch.end.callback = mx.callback.log.train.metric(100))
Start training with 1 devices
[1] Train-accuracy=0.0222222225533591
[2] Train-accuracy=0.0277777783986595
[3] Train-accuracy=0.0277777783986595
[4] Train-accuracy=0.0277777783986595
[5] Train-accuracy=0.0277777783986595
[6] Train-accuracy=0.0277777783986595
[7] Train-accuracy=0.0277777783986595
[8] Train-accuracy=0.0277777783986595
[9] Train-accuracy=0.0277777783986595
[10] Train-accuracy=0.0277777783986595
[11] Train-accuracy=0.0277777783986595
[12] Train-accuracy=0.0277777783986595
[13] Train-accuracy=0.0277777783986595
[14] Train-accuracy=0.0277777783986595
[15] Train-accuracy=0.0277777783986595
[16] Train-accuracy=0.0277777783986595
[17] Train-accuracy=0.0277777783986595
[18] Train-accuracy=0.0277777783986595
[19] Train-accuracy=0.0277777783986595
[20] Train-accuracy=0.0277777783986595
[21] Train-accuracy=0.0277777783986595
[22] Train-accuracy=0.0277777783986595
[23] Train-accuracy=0.0138888890958495
[24] Train-accuracy=0.0111111112766796
[25] Train-accuracy=0.0111111112766796
[26] Train-accuracy=0.0111111112766796
[27] Train-accuracy=0.0111111112766796
[28] Train-accuracy=0.0111111112766796
[29] Train-accuracy=0.0111111112766796
[30] Train-accuracy=0.0111111112766796
[31] Train-accuracy=0.0111111112766796
[32] Train-accuracy=0.0111111112766796
[33] Train-accuracy=0.0111111112766796
[34] Train-accuracy=0.0111111112766796
[35] Train-accuracy=0.0111111112766796
[36] Train-accuracy=0.0111111112766796
[37] Train-accuracy=0.0111111112766796
[38] Train-accuracy=0.0111111112766796
[39] Train-accuracy=0.0111111112766796
[40] Train-accuracy=0.00555555563833978
[41] Train-accuracy=0.00555555563833978
[42] Train-accuracy=0.00555555563833978
[43] Train-accuracy=0.00555555563833978
[44] Train-accuracy=0.00555555563833978
[45] Train-accuracy=0.00555555563833978
[46] Train-accuracy=0.00555555563833978
[47] Train-accuracy=0.00555555563833978
[48] Train-accuracy=0.00555555563833978
[49] Train-accuracy=0.00555555563833978
[50] Train-accuracy=0.00833333345750968
[51] Train-accuracy=0.0111111112766796
[52] Train-accuracy=0.00833333345750968
[53] Train-accuracy=0.00833333345750968
[54] Train-accuracy=0.00833333345750968
[55] Train-accuracy=0.00833333345750968
[56] Train-accuracy=0.00833333345750968
[57] Train-accuracy=0.00833333345750968
[58] Train-accuracy=0.00833333345750968
[59] Train-accuracy=0.00833333345750968
[60] Train-accuracy=0.00833333345750968
[61] Train-accuracy=0.00833333345750968
[62] Train-accuracy=0.00833333345750968
[63] Train-accuracy=0.00833333345750968
[64] Train-accuracy=0.00833333345750968
[65] Train-accuracy=0.00833333345750968
[66] Train-accuracy=0.00833333345750968
[67] Train-accuracy=0.00833333345750968
[68] Train-accuracy=0.00833333345750968
[69] Train-accuracy=0.00833333345750968
[70] Train-accuracy=0.00833333345750968
[71] Train-accuracy=0.00833333345750968
[72] Train-accuracy=0.00833333345750968
[73] Train-accuracy=0.00833333345750968
[74] Train-accuracy=0.00833333345750968
[75] Train-accuracy=0.00555555563833978
[76] Train-accuracy=0.00555555563833978
[77] Train-accuracy=0.00555555563833978
[78] Train-accuracy=0.00555555563833978
[79] Train-accuracy=0.00555555563833978
[80] Train-accuracy=0.00555555563833978
[81] Train-accuracy=0.00555555563833978
[82] Train-accuracy=0.00555555563833978
[83] Train-accuracy=0.00833333345750968
[84] Train-accuracy=0.0111111112766796
[85] Train-accuracy=0.0111111112766796
[86] Train-accuracy=0.0111111112766796
[87] Train-accuracy=0.0111111112766796
[88] Train-accuracy=0.0111111112766796
[89] Train-accuracy=0.00833333345750968
[90] Train-accuracy=0.00833333345750968
[91] Train-accuracy=0.0138888890958495
[92] Train-accuracy=0.0138888890958495
[93] Train-accuracy=0.0138888890958495
[94] Train-accuracy=0.0166666669150194
[95] Train-accuracy=0.0194444447341892
[96] Train-accuracy=0.0222222225533591
[97] Train-accuracy=0.0305555560108688
[98] Train-accuracy=0.0444444451067183
[99] Train-accuracy=0.0750000017384688
[100] Train-accuracy=0.0888888906273577
[101] Train-accuracy=0.0694444463070896
[102] Train-accuracy=0.0777777791437176
[103] Train-accuracy=0.069444446100129
[104] Train-accuracy=0.0611111124356588
[105] Train-accuracy=0.138888891165455
[106] Train-accuracy=0.191666669315762
[107] Train-accuracy=0.261111112518443
[108] Train-accuracy=0.358333335154586
[109] Train-accuracy=0.419444448418087
[110] Train-accuracy=0.455555558204651
[111] Train-accuracy=0.505555555224419
[112] Train-accuracy=0.563888885908657
[113] Train-accuracy=0.594444440470801
[114] Train-accuracy=0.636111108793153
[115] Train-accuracy=0.680555552244186
[116] Train-accuracy=0.713888890213437
[117] Train-accuracy=0.719444450404909
[118] Train-accuracy=0.702777779764599
[119] Train-accuracy=0.74722221493721
[120] Train-accuracy=0.8166666759385
[121] Train-accuracy=0.869444446431266
[122] Train-accuracy=0.886111113760206
[123] Train-accuracy=0.88888888557752
[124] Train-accuracy=0.861111104488373
[125] Train-accuracy=0.738888889551163
[126] Train-accuracy=0.822222216261758
[127] Train-accuracy=0.941666662693024
[128] Train-accuracy=0.969444440470801
[129] Train-accuracy=0.997222221559948
[130] Train-accuracy=0.997222221559948
[131] Train-accuracy=0.997222221559948
[132] Train-accuracy=0.997222221559948
[133] Train-accuracy=0.997222221559948
[134] Train-accuracy=0.997222221559948
[135] Train-accuracy=0.997222221559948
[136] Train-accuracy=0.997222221559948
[137] Train-accuracy=1
[138] Train-accuracy=1
[139] Train-accuracy=1
[140] Train-accuracy=1
[141] Train-accuracy=1
[142] Train-accuracy=0.997222221559948
[143] Train-accuracy=0.997222221559948
[144] Train-accuracy=1
[145] Train-accuracy=1
[146] Train-accuracy=1
[147] Train-accuracy=1
[148] Train-accuracy=1
[149] Train-accuracy=1
[150] Train-accuracy=1
# Predict labels
predicted <- predict(model, test_array)
# Assign labels
predicted_labels <- max.col(t(predicted)) - 1
# Get accuracy
sum(diag(table(test[, 1], predicted_labels)))/40
[1] 0.975