どうやら、偉い人がやれば94~98%くらいの正答率が出るようです。
いくら「なんちゃって」といっても低いので、考え直します。
# read data
setwd("C:/Users/k-harada/Desktop/kdd/digit")
traindata <- read.csv("ORG/train.csv")
testdata <- read.csv("ORG/test.csv")
train_label <- traindata$label[1:28000]
valid_label <- traindata$label[28001:42000]
# do not resize here
train_mat <- as.matrix(traindata[1:28000, -1])
meanMat <- matrix(0, nrow = 28 * 28, ncol = 49)
for (i in seq(7)) {
for (j in seq(7)) {
tempmat <- matrix(0, nrow = 28, ncol = 28)
tempmat[((4 * i - 3):(4 * i)), ((4 * j - 3):(4 * j))] <- 1/16
meanMat[, 7 * (j - 1) + i] <- as.numeric(tempmat)
}
}
answer_mat <- matrix(0, nrow = 28000, ncol = 10)
for (i in seq(10)) {
answer_mat[train_label == (i - 1), i] <- 1
}
# add layer
# learn weights
W1 <- t(meanMat)
intercept1 <- rep(-127, length = 49)
W2 <- matrix(0, nrow = 10, ncol = 49)
intercept2 <- rep(0, length = 10)
# learn rate
eta1 <- 0.001
eta2 <- 0.001
for (i in seq(28000)) {
# feed forward
output1 <- 1/(1 + exp(-1 * (W1 %*% train_mat[i, ] + intercept1)))
output2 <- 1/(1 + exp(-1 * (W2 %*% output1 + intercept2)))
# back propagation
W2 <- W2 + eta2 * (answer_mat[i, ] - output2) %*% t(output1)
intercept2 <- intercept2 + eta2 * (answer_mat[i, ] - output2)
W1 <- W1 + eta1 * ((output1 * (1 - output1)) * (t(W2) %*% (answer_mat[i,
] - output2))) %*% t(train_mat[i, ])
intercept1 <- intercept1 + eta1 * (output1 * (1 - output1)) * (t(W2) %*%
(answer_mat[i, ] - output2))
}
出力の計算
output_mat1 <- 1/(1 + exp(-1 * (train_mat %*% t(W1) + matrix(1, nrow = 28000,
ncol = 1) %*% matrix(intercept1, ncol = 49))))
output_mat2 <- 1/(1 + exp(-1 * (output_mat1 %*% t(W2) + matrix(1, nrow = 28000,
ncol = 1) %*% matrix(intercept2, ncol = 10))))
trainres <- max.col(as.matrix(output_mat2)) - 1
Confusion Matrix
table(trainres, train_label)
## train_label
## trainres 0 1 2 3 4 5 6 7 8 9
## 0 2345 9 265 232 22 934 130 12 221 32
## 1 4 3022 236 71 68 238 81 86 287 48
## 2 6 17 1179 21 2 37 7 5 23 1
## 3 25 27 362 2294 4 544 5 26 548 27
## 4 9 2 90 13 2186 187 21 48 222 1015
## 5 0 0 1 18 4 190 0 1 24 6
## 6 284 14 582 29 195 130 2535 13 114 30
## 7 11 12 63 130 267 68 0 2706 222 1428
## 8 37 4 43 100 1 179 0 20 1019 40
## 9 0 0 3 1 7 14 0 7 14 138
Categorization Accuracy
mean(trainres == train_label)
## [1] 0.6291
さて、ここで反省します。
入力層から中間層の間に問題があるとみてよいでしょうから、
そこを見てみます。
intercept1[1:10]
## [1] -127 -127 -127 -127 -127 -127 -127 -127 -127 -127
image(matrix(W1[1, ], nrow = 28))
・・・まるで学習していない。。。
結論から言うと、初期値が悪いようです。
Back Propagationの式をよく見ると(output1*(1-output1))という項があります。
今の初期値ではoutput1には\( 1/1+\exp(127) \)とか\( 1/1+\exp(-127) \)が入って、この値はすごく0になります。つまり動きません。
これはシグモイド関数(ってか有界な関数全般)の弱点なので、ケアしてあげる必要があるようです。
ということで、やり直します。
やることはデータを255で割って範囲を小さくすることと、
学習パラメータをちょっと大きくすることと、あと検証データの精度も計って、ループを増やしました。
けっこういっぱいしれっとやったね!
# EDIT read data
setwd("C:/Users/k-harada/Desktop/kdd/digit")
traindata <- read.csv("ORG/train.csv")
testdata <- read.csv("ORG/test.csv")
train_label <- traindata$label[1:28000]
valid_label <- traindata$label[28001:42000]
# do not resize her
train_mat <- as.matrix(traindata[1:28000, -1])/255
valid_mat <- as.matrix(traindata[28001:42000, -1])/255
meanMat <- matrix(0, nrow = 28 * 28, ncol = 49)
for (i in seq(7)) {
for (j in seq(7)) {
tempmat <- matrix(0, nrow = 28, ncol = 28)
tempmat[((4 * i - 3):(4 * i)), ((4 * j - 3):(4 * j))] <- 1/16
meanMat[, 7 * (j - 1) + i] <- as.numeric(tempmat)
}
}
answer_mat <- matrix(0, nrow = 28000, ncol = 10)
for (i in seq(10)) {
answer_mat[train_label == (i - 1), i] <- 1
}
# three layers
# learn weights
W1 <- t(meanMat)
intercept1 <- rep(-0.5, length = 49)
W2 <- matrix(0, nrow = 10, ncol = 49)
intercept2 <- rep(0, length = 10)
# learn rate
eta1 <- 0.01
eta2 <- 0.01
nloop <- 5
train_err <- rep(0, length = nloop)
valid_err <- rep(0, length = nloop)
for (loop in seq(nloop)) {
for (i in seq(28000)) {
# feed forward
output1 <- 1/(1 + exp(-1 * (W1 %*% train_mat[i, ] + intercept1)))
output2 <- 1/(1 + exp(-1 * (W2 %*% output1 + intercept2)))
# back propagation
W2 <- W2 + eta2 * (answer_mat[i, ] - output2) %*% t(output1)
intercept2 <- intercept2 + eta2 * (answer_mat[i, ] - output2)
W1 <- W1 + eta1 * ((output1 * (1 - output1)) * (t(W2) %*% (answer_mat[i,
] - output2))) %*% t(train_mat[i, ])
intercept1 <- intercept1 + eta1 * (output1 * (1 - output1)) * (t(W2) %*%
(answer_mat[i, ] - output2))
}
output_mat1 <- 1/(1 + exp(-1 * (train_mat %*% t(W1) + matrix(1, nrow = 28000,
ncol = 1) %*% matrix(intercept1, ncol = 49))))
output_mat2 <- 1/(1 + exp(-1 * (output_mat1 %*% t(W2) + matrix(1, nrow = 28000,
ncol = 1) %*% matrix(intercept2, ncol = 10))))
output_mat1_v <- 1/(1 + exp(-1 * (valid_mat %*% t(W1) + matrix(1, nrow = 14000,
ncol = 1) %*% matrix(intercept1, ncol = 49))))
output_mat2_v <- 1/(1 + exp(-1 * (output_mat1_v %*% t(W2) + matrix(1, nrow = 14000,
ncol = 1) %*% matrix(intercept2, ncol = 10))))
trainres <- max.col(as.matrix(output_mat2)) - 1
validres <- max.col(as.matrix(output_mat2_v)) - 1
train_err[loop] <- mean(trainres == train_label)
valid_err[loop] <- mean(validres == valid_label)
print(paste("LOOP=", loop, "now"))
}
## [1] "LOOP= 1 now"
## [1] "LOOP= 2 now"
## [1] "LOOP= 3 now"
## [1] "LOOP= 4 now"
## [1] "LOOP= 5 now"
table(trainres, train_label)
## train_label
## trainres 0 1 2 3 4 5 6 7 8 9
## 0 2658 0 9 2 3 22 17 9 8 14
## 1 0 3032 12 9 11 5 2 19 16 7
## 2 5 8 2656 38 10 8 2 40 10 1
## 3 5 13 26 2725 1 66 0 21 31 41
## 4 5 2 20 2 2623 12 6 17 3 53
## 5 6 7 1 39 0 2313 15 2 12 11
## 6 15 3 22 8 16 23 2716 2 8 1
## 7 0 9 26 13 5 2 1 2775 4 23
## 8 21 31 49 62 17 54 20 8 2593 35
## 9 6 2 3 11 70 16 0 31 9 2579
table(validres, valid_label)
## valid_label
## validres 0 1 2 3 4 5 6 7 8 9
## 0 1376 0 10 3 2 13 15 6 1 13
## 1 0 1540 0 4 7 4 2 6 19 7
## 2 2 4 1266 23 3 6 5 24 10 2
## 3 3 6 17 1338 1 48 0 3 9 23
## 4 4 2 9 0 1239 12 5 14 3 32
## 5 4 0 4 25 1 1136 7 2 10 6
## 6 8 1 13 5 10 11 1311 0 4 1
## 7 0 2 11 10 0 1 0 1405 4 24
## 8 14 22 21 31 11 31 13 4 1303 21
## 9 0 0 2 3 42 12 0 13 6 1294
# plot
plot(train_err, type = "l", col = 1, ylim = c(0.85, 1), xlab = "", ylab = "")
par(new = T)
plot(valid_err, type = "l", col = 2, ylim = c(0.85, 1), xlab = "LOOP", ylab = "Accuracy")
ちなみにループ増やすと検証でも96%~97%くらいまで到達しました。