# =================================================================================================
#
#install.packages("plot.matrix")
library(plot.matrix)
library(data.table)
library(ggplot2)
library(fst)
library(data.table)
library(magrittr)
library(kableExtra)
library(ggplot2)
library(leaps)
library(png)
library(knitr)
library(grid)
library(gridExtra)
library(plot.matrix)
library(devtools)
#install.packages("tensorflow", type="source")
library(tensorflow)
#install_tensorflow()
#install.packages("keras", type="source")
library(keras)
#install_keras()
library(reticulate)
#
########################################################################
#
# Extract, Transform, Load
# Read data, investigate dimensions, type of variables, values taken
# (distribution or frequency), missing values.
#
########################################################################
#
#
# ==========================================================
data <- "Data"
rawData <- file.path(data, "raw")
rData <- file.path(data, "r")
list.files(rawData)
[1] "KDigits_train_02.csv" "KDigits_train_03.csv" "KDigits_train_04.csv"
[4] "KDigits_train_05.csv" "KDigits_train_06.csv" "KDigits_train_07.csv"
[7] "KDigits_train_08.csv" "KDigits_train_09.csv" "KDigits_train_10.csv"
[10] "KDigits_train_data.csv" "Titanic_test.csv" "Titanic_train.csv"
#
#
# Simply reading all .csv files listed above, then writing as .fst ,
# the commands below are the same for all files.
#
#
# All training data
#
# ========= -- DO NOT USE - only for reference
#data.train <- read.csv(file.path(rawData, "KDigits_train_data.csv"), stringsAsFactors = T)
#write_fst(data.train, file.path(rData, "data.train.fst"))
# =========
# Split training data
#
# ==========
#s01 <- read.csv(file.path(rawData, "KDigits_train_01.csv"), stringsAsFactors = T)
#write_fst(s01, file.path(rData, "s01.fst"))
#s02 <- read.csv(file.path(rawData, "KDigits_train_02.csv"), stringsAsFactors = T)
#write_fst(s02, file.path(rData, "s02.fst"))
#s03 <- read.csv(file.path(rawData, "KDigits_train_03.csv"), stringsAsFactors = T)
#write_fst(s03, file.path(rData, "s03.fst"))
#s04 <- read.csv(file.path(rawData, "KDigits_train_04.csv"), stringsAsFactors = T)
#write_fst(s04, file.path(rData, "s04.fst"))
#s05 <- read.csv(file.path(rawData, "KDigits_train_05.csv"), stringsAsFactors = T)
#write_fst(s05, file.path(rData, "s05.fst"))
#s06 <- read.csv(file.path(rawData, "KDigits_train_06.csv"), stringsAsFactors = T)
#write_fst(s06, file.path(rData, "s06.fst"))
#s07 <- read.csv(file.path(rawData, "KDigits_train_07.csv"), stringsAsFactors = T)
#write_fst(s07, file.path(rData, "s07.fst"))
#s08 <- read.csv(file.path(rawData, "KDigits_train_08.csv"), stringsAsFactors = T)
#write_fst(s08, file.path(rData, "s08.fst"))
#s09 <- read.csv(file.path(rawData, "KDigits_train_09.csv"), stringsAsFactors = T)
#write_fst(s09, file.path(rData, "s09.fst"))
#s10 <- read.csv(file.path(rawData, "KDigits_train_10.csv"), stringsAsFactors = T)
#write_fst(s10, file.path(rData, "s10.fst"))
# ==========
#
#
# This is the final product of the work above.
#
# ==========================================================
data.train <- read_fst(file.path(rData, "data.train.fst"))
s01 <- read_fst(file.path(rData, "s01.fst"))
s02 <- read_fst(file.path(rData, "s02.fst"))
s03 <- read_fst(file.path(rData, "s03.fst"))
s04 <- read_fst(file.path(rData, "s04.fst"))
s05 <- read_fst(file.path(rData, "s05.fst"))
s06 <- read_fst(file.path(rData, "s06.fst"))
s07 <- read_fst(file.path(rData, "s07.fst"))
s08 <- read_fst(file.path(rData, "s08.fst"))
s09 <- read_fst(file.path(rData, "s09.fst"))
s10 <- read_fst(file.path(rData, "s10.fst"))
#
#
#
sum(is.na(data.train))
[1] 0
#
# There are no missing/NA values present in the data.
typeof(data.train)
[1] "list"
#
# The dataset as a whole is of type 'list'.
typeof(data.train[,3])
[1] "integer"
#
# A single component is of type 'integer'.
type.list <- data.frame(ncol(data.train))
for(i in 1:ncol(data.train)){
type.list[i,1] <- typeof(data.train[,i])
}
length(unique(type.list))
[1] 1
#
# If a data type other than integer were present in the
# created 'type.list' variable, the length of unique
# values would be greater than 1.
#
# Since length equals 1, the variables composing the data are all of type 'integer'.
dim(data.train)
[1] 41897 786
#
# The training data -as a whole- consists of 41,897
# observations and 786 variables.
hist(data.train$label, xlab="Label", main = "Label Distribution")
#
# All labels follow a uniform distribution.
#
#
# library(vtable)
# st(data.train) -- This is the function that inspired the computations below.
#
#
# This function calculates the difference for each descriptive statistic -as seen
# within the function- for all subsets of data against the whole.
#
# ==========================================================
summary.compare <- function(list1, list2){
d_mean = abs(mean(list1) - mean(list2))
d_st = abs(sd(list1) - sd(list2))
d_med = abs(median(list1) - median(list2))
d_25 = abs(quantile(list1, 0.25) - quantile(list2, 0.25))
d_75 = abs(quantile(list1, 0.75) - quantile(list2, 0.75))
d_min = abs(min(list1) - min(list2))
d_max = abs(max(list1) - max(list2))
vals <- c(d_mean, d_st, d_med, d_25, d_75, d_min, d_max)
return(vals)
}
#
#
dim(s01)
[1] 4167 787
#
#
### The following is only to give insight into the current computations.
##
# Each subset of data has its 787 variables compared to the training data
# to see any potentially existing variation. If there's any difference, it temporarily
# populates the 'diff' variable seen inside the loop.
#
diff <- matrix(0, nrow=7, ncol=ncol(data.train))
for(i in 1:ncol(data.train)){
diff[c(1,2,3,4,5,6,7),i] <- round(summary.compare(data.train[,i], s01[,i]), digits=3)
}
diff[, 1:14] # NOTE: this is only for subset 's01'
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
[1,] 0.039 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0.023 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0.000 0 0 0 0 0 0 0 0 0 0 0 0
[4,] 0.000 0 0 0 0 0 0 0 0 0 0 0 0
[5,] 0.000 0 0 0 0 0 0 0 0 0 0 0 0
[6,] 0.000 0 0 0 0 0 0 0 0 0 0 0 0
[7,] 0.000 0 0 0 0 0 0 0 0 0 0 0 0
[,14]
[1,] 0.003
[2,] 0.567
[3,] 0.000
[4,] 0.000
[5,] 0.000
[6,] 0.000
[7,] 116.000
sum(diff[1,]) # mean
[1] 569.161
sum(diff[2,]) # st dev
[1] 752.783
sum(diff[3,]) # median
[1] 817
NOTE: these plots are only for subset s01. It is interesting to see that most summary statistics deviate very little from the training set, but also contain observations that deviate highly.
#
# Commented out because it takes some time. This populates a matrix with
# the differences of each subset of data against the entire training data, and
# updates the total sum for each summary statistic continuously.
#
#
# diff.mat <- matrix(0, nrow=7, ncol=10)
# for(i in 1:ncol(data.train)){ # top loop creates temporary 'diff' variable
# d01 <- summary.compare(data.train[,i], s01[,i])
# d02 <- summary.compare(data.train[,i], s02[,i])
# d03 <- summary.compare(data.train[,i], s03[,i])
# d04 <- summary.compare(data.train[,i], s04[,i])
# d05 <- summary.compare(data.train[,i], s05[,i])
# d06 <- summary.compare(data.train[,i], s06[,i])
# d07 <- summary.compare(data.train[,i], s07[,i])
# d08 <- summary.compare(data.train[,i], s08[,i])
# d09 <- summary.compare(data.train[,i], s09[,i])
# d10 <- summary.compare(data.train[,i], s10[,i])
# for(r in 1:7){ # loop 2 populates matrix with 'diff' variable
# diff.mat[r,1] <- diff.mat[r,1] + d01[r]
# diff.mat[r,2] <- diff.mat[r,2] + d02[r]
# diff.mat[r,3] <- diff.mat[r,3] + d03[r]
# diff.mat[r,4] <- diff.mat[r,4] + d04[r]
# diff.mat[r,5] <- diff.mat[r,5] + d05[r]
# diff.mat[r,6] <- diff.mat[r,6] + d06[r]
# diff.mat[r,7] <- diff.mat[r,7] + d07[r]
# diff.mat[r,8] <- diff.mat[r,8] + d08[r]
# diff.mat[r,09] <- diff.mat[r,09] + d09[r]
# diff.mat[r,10] <- diff.mat[r,10] + d10[r]
# }
# }
#
# Renaming matrix column and row names for table presentation.
#
#colnames(diff.mat) <- c("s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10")
#rownames(diff.mat) <- c("Mean", "Sd", "Median", "25th", "75th", "Min", "Max")
#write.table(diff.mat, "diff.txt")
diff.mat <- read.table("diff.txt")
kbl(diff.mat, caption="Subset Variation from Training Set", booktabs = T, linesep = "") %>%
kable_styling(latex_options = c("striped", "hold_position"), stripe_index = c(1,2, 5:6))
s01 | s02 | s03 | s04 | s05 | s06 | s07 | s08 | s09 | s10 | |
---|---|---|---|---|---|---|---|---|---|---|
Mean | 569.1622 | 622.5883 | 599.7935 | 497.8024 | 861.6378 | 407.6950 | 622.258 | 735.4372 | 705.6742 | 665.5293 |
Sd | 752.7789 | 837.5735 | 616.3175 | 643.8175 | 757.7040 | 617.0945 | 648.084 | 747.1020 | 808.5846 | 750.9476 |
Median | 817.0000 | 827.0000 | 830.0000 | 673.0000 | 1094.0000 | 588.0000 | 1254.500 | 1129.0000 | 792.0000 | 1014.0000 |
25th | 305.0000 | 272.0000 | 163.0000 | 166.2500 | 178.0000 | 214.5000 | 365.500 | 55.5000 | 79.0000 | 53.0000 |
75th | 1219.5000 | 1278.0000 | 1026.0000 | 917.5000 | 1464.0000 | 856.0000 | 907.000 | 1260.7500 | 1343.0000 | 1137.0000 |
Min | 0.0000 | 18.0000 | 8.0000 | 10.0000 | 3.0000 | 2.0000 | 17.000 | 4.0000 | 6.0000 | 34.0000 |
Max | 13201.0000 | 14139.0000 | 13331.0000 | 13539.0000 | 11345.0000 | 11670.0000 | 14066.000 | 14029.0000 | 12878.0000 | 12547.0000 |
#
# Checking Frequency and Distribution of Subset Differences
#
row.names <- rownames(diff.mat)
plot.mat <- cbind(diff.mat, row.names)
p.m.see <- melt(plot.mat[1:6,]) # separating high from low frequencies
p.m.up <- melt(plot.mat[7,]) # 'see' is to see difference, 'up' is upper
plot.mat <- melt(plot.mat)
hist(plot.mat$value, xlab="Difference in Sum", main = "Total Subset Differences")
g1 <- ggplot(data=p.m.up, aes(x=variable, y=value)) +
geom_bar(stat="identity", position=position_dodge()) +
ggtitle("Mean") + xlab("Subset")
g2 <- ggplot(data=p.m.see, aes(x=variable, y=value, fill=row.names)) +
geom_bar(stat="identity") +
facet_wrap(~ row.names) +
scale_x_discrete(breaks = unique(p.m.see$variable)[seq(2,10,2)]) +
theme(legend.position = "none",
axis.text=element_text(size=12)) + xlab("Subset")
grid.arrange(g1,g2)
#
########################################################################
#
# Separate the outcome ("label") variable into a separate dataset (to
# prevent model from 'peeking' into the result).
#
# Make sure model only uses intended predictors of the form "X20x31"
# and similar (you will have to drop some columns like "id", "cut", etc.).
#
########################################################################
#
#
# Columns to drop:
# - label
# - id
# - cut
#
#
# ==========================================================
s01.t <- s01[,2:785]
s02.t <- s02[,2:785]
s03.t <- s03[,2:785]
s04.t <- s04[,2:785]
s05.t <- s05[,2:785]
s06.t <- s06[,2:785]
s07.t <- s07[,2:785]
s08.t <- s08[,2:785]
s09.t <- s09[,2:785]
s10.t <- s10[,2:785]
#
#
# Now that I have square matrices of pure 8-bit grayscale values, I want to plot some.
# First, I converted one row into an array of integers and transposed it so that I could easily
# convert it into a square matrix. The matrix of numbers is interesting.
#
# Next, I saved the .csv file, then opened it in MATLAB.
# MATLAB has a 'mesh()' function which produced the plots in the following screenshots.
#
one.img <- s02.t[1,]
one.img <- as.vector(t(one.img))
one.img <- matrix(one.img, nrow=28, ncol=28)
#write.csv(one.img, "one.img.csv")
one.img.2 <- s02.t[2,]
one.img.2 <- as.vector(t(one.img.2))
one.img.2 <- matrix(one.img.2, nrow=28, ncol=28)
#write.csv(one.img.2, "one.img.2.csv")
grid.arrange(rasterGrob(img1), rasterGrob(img2),
rasterGrob(img3), rasterGrob(img4),
ncol=2, nrow=2)
#
# It is also possible to do this in R with the 'plot.matrix' library.
#
brk = 50
plot(one.img.2, border=NA, breaks=brk, col=heat.colors(brk),
key=NULL)
#
# ======= FUNCTION 1 ===========================================================
#
# Prepare Test Sets
#
test.prep.x <- function(pure){
x_test <- as.matrix(pure)
x_test <- x_test / 255
return(x_test)
}
test.prep.y <- function(excluded){
y_test <- to_categorical(excluded, 10)
return(y_test)
}
# ======= FUNCTION 2 ===========================================================
#
# Makes Predictions
# Calls my.prediction function to evaluate
#
my.model.preds <- function(x_test, y_test, id){
predictions <- modelnn %>% predict(x_test) %>% k_argmax()
# Sending to other function for eval
final.pred <- my.prediction(predictions, y_test, id)
return(final.pred)
}
# ======= FUNCTION 3 ===========================================================
#
# Receives Test Set Predictions
# Organizes for Evaluation
#
#
my.prediction <- function(preds, actual, id){
test.pred <- t(t(as.numeric(preds[])))
test.pred <- cbind(id, test.pred, rep(0,length(actual)))
count = 1
for(i in 1:nrow(actual)){
for(t in 1:ncol(actual)){
if((actual[i,t] == 1) && (count < nrow(test.pred))){
test.pred[count,3] = t
count = count + 1
break
}else if((actual[i,t] == 1) && (count == nrow(test.pred))){
test.pred[count,3] = t
break
}
}
}
test.pred[,3] <- test.pred[,3]-1
return(test.pred)
}
#
# ========================================================== Training Evaluation Set
exclude <- rbind(s01[,c(1,786,787)], s02[,c(1,786,787)], s03[,c(1,786,787)],
s05[,c(1,786,787)], s06[,c(1,786,787)], s08[,c(1,786,787)], s10[,c(1,786,787)])
exclude.2 <- rbind(s09[,c(1,786,787)])
exclude.3 <- rbind(s07[,c(1,786,787)])
exclude.4 <- rbind(s04[,c(1,786,787)])
# ========================================================== Training Set
x_train <- rbind(s01.t, s02.t, s03.t, s05.t, s06.t, s08.t, s10.t)
x_train <- x_train / 255
g_train <- exclude[,1]
x_train <- as.matrix(x_train)
y_train <- to_categorical(g_train, 10)
# ========================================================== Multiple Test Sets
x_test1 <- test.prep.x(s04.t)
y_test1 <- test.prep.y(exclude.4[,1])
x_test2 <- test.prep.x(s07.t)
y_test2 <- test.prep.y(exclude.3[,1])
x_test3 <- test.prep.x(s09.t)
y_test3 <- test.prep.y(exclude.2[,1])
dim(x_train)
[1] 29224 784
dim(x_test1)
[1] 4254 784
dim(x_test2)
[1] 4230 784
dim(x_test3)
[1] 4189 784
#
########################################################################
#
# Model and parameter choices.
#
# Include comments showing you are aware of which parameters you set,
# or may be implicit (default) in the model.
#
########################################################################
#
#
#
#
# Multilayer Neural Network
#
#
modelnn <- keras_model_sequential()
modelnn %>% # first layer: 28x28=784 input units
layer_dense(units = 256, activation = "relu", # -> hidden layer of 256
input_shape = c(784)) %>% # ^ layer_den takes as input modelnn object, returns object
layer_dropout(rate = 0.4) %>% # ^ object piped through for dropout regularization
layer_dense(units = 128, activation = "relu") %>% # second hidden layer
layer_dropout(rate = 0.3) %>% # dropout layer
layer_dense(units = 10, activation = "softmax") # defines map from second hidden layer
# to class probabilities
# This model has four layers in total:
# (1) Input layer
# (2) Hidden layer 1
# (3) Hidden layer 2
# (4) Output layer
#
#
# -The input layer and its following weights have 785 x 256 = 200,960 elements.
# -Each element feeds feeds to the second hidden layer through a matrix of weights
# of dimension 257 x 128 = 32,896.
# -Then comes the output layer with 10 responses. The weights associated with this
# final layer are 129 x 10 = 1,290 weights.
#
#
summary(modelnn)
Model: "sequential"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
dense_2 (Dense) (None, 256) 200960
dropout_1 (Dropout) (None, 256) 0
dense_1 (Dense) (None, 128) 32896
dropout (Dropout) (None, 128) 0
dense (Dense) (None, 10) 1290
================================================================================
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
________________________________________________________________________________
# Fit model by minimizing cross-entropy function.
#
# Since the response is qualitative, we look for coefficient estimates that minimize the
# negative multinomial log-likelihood aka the cross-entropy.
#
# If the response was qualitative, we would instead minimize squared-error loss.
#
modelnn %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_rmsprop(),
metrics = c("accuracy")
)
#
########################################################################
#
# Test the performance of your model(s) on a different split of data
# (this means that you should NOT have used all the splits of the data
# in your training model).
#
########################################################################
#
#
#
# Final Step
# Supply training data and fit model.
system.time(
history <- modelnn %>%
fit(x_train, y_train, epochs = 30, batch_size = 128,
validation_split = 0.2)
)
user system elapsed
66.17 1.83 18.86
plot(history, smooth = FALSE)
# This model starts overfitting at around epoch 7. One method to prevent it would be to
# stop training at epoch 7.
# Test Error
modelnn %>% evaluate(x_test1, y_test1)
loss accuracy
0.1117804 0.9774330
#
### Making predictions
##
#
#
# predicting the softmax probabilities
#
# This is a probability assigned to each of the 10 classes. The classifier then
# assigns the image to the class with the highest probability.
#
predictions <- modelnn %>% predict(x_test1)
round(head(predictions), 3)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0 0 0 0.000 1 0 0.000 0 0.000
[2,] 0 0 0 1 0.000 0 0 0.000 0 0.000
[3,] 0 0 0 0 0.000 0 0 0.000 1 0.000
[4,] 0 0 0 0 0.000 0 0 0.001 0 0.999
[5,] 0 0 0 0 0.037 0 0 0.001 0 0.962
[6,] 1 0 0 0 0.000 0 0 0.000 0 0.000
#
#
# Directly predicting label
label_pred <- modelnn %>% predict(x_test1) %>% k_argmax()
label_pred[1:20]
tf.Tensor([5 3 8 9 9 0 9 4 1 2 7 4 5 9 7 9 5 4 0 3], shape=(20,), dtype=int64)
#
#
#
#
# Subset 04
# (test eval #1)
#
final.preds1 <- my.model.preds(x_test1, y_test1, exclude.4$id)
tab1 <- table(final.preds1[,2], final.preds1[,3])
tab1
0 1 2 3 4 5 6 7 8 9
0 439 0 3 0 0 0 1 0 1 0
1 0 469 1 1 3 0 0 0 3 0
2 0 1 428 5 1 1 0 2 2 0
3 0 0 2 425 0 4 0 1 1 1
4 0 0 0 1 432 0 2 0 0 10
5 2 0 0 1 0 362 1 0 2 2
6 2 1 1 0 2 0 394 0 1 0
7 0 3 2 2 3 1 0 402 0 6
8 0 0 0 1 0 0 0 0 381 1
9 2 0 0 0 8 2 0 0 3 426
sum(diag(tab1))/sum(tab1)
[1] 0.977433
#
#
# Subset 07 (same as training)
# (test eval #2)
#
final.preds2 <- my.model.preds(x_test2, y_test2, exclude.3$id)
tab2 <- table(final.preds2[,2], final.preds2[,3])
tab2
0 1 2 3 4 5 6 7 8 9
0 407 0 2 0 1 0 2 0 1 0
1 0 465 0 3 2 0 0 1 4 1
2 1 1 407 2 2 0 0 1 1 1
3 0 0 2 424 0 9 0 1 3 0
4 0 0 0 0 396 0 2 0 1 4
5 0 0 0 2 2 380 4 0 2 1
6 0 0 0 0 1 3 401 0 1 1
7 0 2 1 1 2 1 0 449 0 4
8 0 1 1 4 0 1 0 0 399 3
9 0 1 1 0 5 0 0 4 4 401
sum(diag(tab2))/sum(tab2)
[1] 0.9761229
#
#
# Subset 09
# (test eval #3)
#
final.preds3 <- my.model.preds(x_test3, y_test3, exclude.2$id)
tab3 <- table(final.preds3[,2], final.preds3[,3])
tab3
0 1 2 3 4 5 6 7 8 9
0 405 0 0 1 0 0 0 1 2 0
1 0 483 1 0 0 0 1 2 4 2
2 1 0 381 4 2 0 0 3 1 0
3 0 0 1 422 0 6 0 0 7 2
4 0 2 0 0 410 0 1 1 0 2
5 0 0 0 4 0 381 1 0 5 4
6 2 1 0 1 0 2 400 0 1 0
7 0 2 2 3 1 1 0 446 1 8
8 1 1 2 3 1 1 1 0 392 3
9 0 0 0 2 4 1 0 0 3 360
sum(diag(tab3))/sum(tab3)
[1] 0.9739795
#
########################################################################
#
# Identify examples (data inputs) that were mispredicted, and plot them.
# hint: create a function from the sample code included in RCode/plot_kdigits.R
#
########################################################################
#
#
#
# isolate incorrect values
#
my.wrong.finder <- function(examine){
wrong_id <- matrix(0, nrow=abs(sum(examine[,2] == examine[,3]) - nrow(examine)), ncol=4)
count = 1
for(i in 1:nrow(examine)){
if(examine[i,2] != examine[i,3]){
wrong_id[count,1] = examine[i,1]
wrong_id[count,2] = examine[i,2]
wrong_id[count,3] = examine[i,3]
wrong_id[count,4] = abs(examine[i,2] - examine[i,3])
count = count + 1
}
}
return(wrong_id)
}
#
# image prep
#
my.pixel.printer <- function(img.loc){
img.loc <- as.vector(t(img.loc))
img.loc <- matrix(img.loc, nrow=28, ncol=28)
# brk = 50
# p <- plot(img, border=NA, breaks=brk, col=heat.colors(brk),
# key=NULL)
return(img.loc)
}
#
wrong.1 <- my.wrong.finder(final.preds1)
sum(wrong.1[,4])
[1] 354
nrow(wrong.1)
[1] 96
#
#
wrong.2 <- my.wrong.finder(final.preds2)
sum(wrong.2[,4])
[1] 334
nrow(wrong.2)
[1] 101
#
#
wrong.3 <- my.wrong.finder(final.preds3)
sum(wrong.3[,4])
[1] 413
nrow(wrong.3)
[1] 109
#
brk = 50
img1 <- my.pixel.printer(data.train[wrong.1[3,1]+1,])
img2 <- my.pixel.printer(data.train[wrong.1[45,1]+1,])
img3 <- my.pixel.printer(data.train[wrong.1[69,1]+1,])
img4 <- my.pixel.printer(data.train[wrong.1[74,1]+1,])
wrong.1[3,2] # Actual Value
[1] 7
wrong.1[3,3] # Predicted Value
[1] 9
plot(img1, border=NA, breaks=brk, col=heat.colors(brk),
key=NULL)
wrong.1[45,2] # Actual Value
[1] 8
wrong.1[45,3] # Predicted Value
[1] 9
plot(img2, border=NA, breaks=brk, col=heat.colors(brk),
key=NULL)
wrong.1[69,2] # Actual Value
[1] 7
wrong.1[69,3] # Predicted Value
[1] 2
plot(img3, border=NA, breaks=brk, col=heat.colors(brk),
key=NULL)
wrong.1[74,2] # Actual Value
[1] 8
wrong.1[74,3] # Predicted Value
[1] 3
plot(img4, border=NA, breaks=brk, col=heat.colors(brk),
key=NULL)
Incorrect predictions are made when the written number is almost illegible. With all the incorrect numbers I’ve seen so far, I cannot blame the model for some of its incorrect predictions.