MNIST (“Modified National Institute of Standards and Technology”) is the de facto “hello world” dataset of computer vision. Since its release in 1999, this classic dataset of handwritten images has served as the basis for benchmarking classification algorithms. As new machine learning techniques emerge, MNIST remains a reliable resource for researchers and learners alike.
setwd("D:/mnist")
rm(list=ls()) #take out the papers and the trash
memory.size(max=TRUE) #set this bad to max
## [1] 32.19
rm(list=ls())
train=read.csv("train.csv", stringsAsFactors=F) #read in the raw data
str(train)
## 'data.frame': 42000 obs. of 785 variables:
## $ label : int 1 0 1 4 0 0 7 3 5 3 ...
## $ pixel0 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel6 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel7 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel8 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel9 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel11 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel12 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel13 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel14 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel15 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel16 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel17 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel19 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel21 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel23 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel24 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel25 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel26 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel27 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel29 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel30 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel31 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel32 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel33 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel34 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel35 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel36 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel37 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel38 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel39 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel40 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel42 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel43 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel44 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel45 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel47 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel49 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel50 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel51 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel52 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel53 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel54 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel55 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel56 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel57 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel58 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel59 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel60 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel61 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel62 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel63 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel64 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel65 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel66 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel67 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel68 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel69 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel70 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel71 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel72 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel73 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel74 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel75 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel76 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel77 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel78 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel79 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel80 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel81 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel82 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel83 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel84 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel85 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel86 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel87 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel88 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel89 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel90 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel91 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel92 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel93 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel94 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel95 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel96 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pixel97 : int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
mylabel=train[,1] #assign label to variable
train[,1]=NULL #eliminates the first column from the training data set
test=read.csv("test.csv", stringsAsFactors=F) #read in the test data
library(EBImage) #image processing library
myorder=sort(seq(1:784), decreasing=TRUE) #optional
newtrain=train[,c(myorder)] #optional
myviz=as.vector(unlist(newtrain)) #build a vector from the matrix
im=array(myviz, dim=c(42000,28,28)) #build an array from the vector
newim=flip(im) #flip the image over, so that it displays right... optional
par(mfrow=c(3,3)) #set the plotting space to 3 x 3
par(ask=TRUE) #change to next 9 pix when ENTER is hit
for (i in 1:100){ #verify images are displaying
image(1:28, 1:28, newim[i,,], col=gray((0:255)/255)) #color the images gray scale
}
###Outlier Analysis Think I should oversample 1’s and 7’s?
#Don't forget to analyze outliers before you do anything!!
#x outliers
new=as.matrix(newtrain) #set up our training data in matrix form
maxit=matrix(c(rep(0,30)),30) #set up a matrix of zeros to store the max values
minit=matrix(c(rep(0,30)),30) #set up a matrix of zeros to store the min values
for (i in 1:30){ # loop
minit[i]=which.min(new[,i]) #identify the observations with the min values of all variables
maxit[i]=which.max(new[,i]) #identify the observations with the max values of all variables
}
par(mfrow=c(3,3)) #now let's look only at the 60 pictures that might be outliers
par(ask=TRUE) #don't flip automatically
for (i in 1:30){ #loop over the outliers
image(1:28, 1:28, newim[minit[i],,], col=gray((0:255)/255),xlab=minit[i]) #disply min outlier
image(1:28,1:28,newim[maxit[i],,], col=gray((0:255)/255),xlab=maxit[i]) #display max outlier
}
mylabel[minit]
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
mylabel[maxit]
## [1] 1 1 1 1 7 7 9 7 9 7 7 7 7 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1
###One technique you might want to try is to add graphical objects to the dataset that are somewhat warped or transformed. In this way, you are inducing error that might be modeled. But I am not going to do that.
library(nnet) #neural net library
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.3
library(caret)
## Warning: package 'caret' was built under R version 3.3.3
## Loading required package: lattice
## Loading required package: ggplot2
set.seed=1234 #keep the same pseudo-random number stream
tn=read.csv("train.csv") #read in the raw data
test=read.csv("test.csv") #read in the test data
tn$label=factor(tn$label) #assign the label as a factor
#tmodel=tune.nnet(label~., data=tn, size=1:10, MaxNWts=100000, trace=TRUE, maxit=200)
mynn=nnet(label~., data=tn, size=4, maxit=200, MaxNWts=3200, trace=TRUE)
## # weights: 3190
## initial value 101665.709387
## iter 10 value 74067.250609
## iter 20 value 72228.813799
## iter 30 value 71828.742134
## iter 40 value 68718.559163
## iter 50 value 64921.714329
## iter 60 value 62250.103581
## iter 70 value 60718.154658
## iter 80 value 59293.646990
## iter 90 value 58942.950135
## iter 100 value 58767.553394
## iter 110 value 58174.810657
## iter 120 value 57034.228503
## iter 130 value 56814.670134
## iter 140 value 56517.634644
## iter 150 value 55150.652660
## iter 160 value 54940.333479
## iter 170 value 54875.420881
## iter 180 value 54706.036880
## iter 190 value 54406.288739
## iter 200 value 53228.702193
## final value 53228.702193
## stopped after 200 iterations
mypredict=predict(mynn,tn,type="class")
nntable=table(mypredict,tn$label)
confusionMatrix(mypredict, tn$label)
## Warning in levels(reference) != levels(data): longer object length is not a
## multiple of shorter object length
## Warning in confusionMatrix.default(mypredict, tn$label): Levels are not in
## the same order for reference and data. Refactoring data to match.
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1 2 3 4 5 6 7 8 9
## 0 3864 59 2933 742 18 2491 728 29 2647 51
## 1 2 4556 178 311 54 288 44 191 934 124
## 2 0 0 0 0 0 0 0 0 0 0
## 3 126 59 422 3121 0 833 1 23 251 45
## 4 3 2 216 41 3449 53 219 320 149 3451
## 5 0 0 0 1 0 0 0 0 0 0
## 6 135 0 374 40 521 102 3144 72 64 187
## 7 2 8 54 94 29 28 1 3766 18 330
## 8 0 0 0 1 0 0 0 0 0 0
## 9 0 0 0 0 1 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.5214
## 95% CI : (0.5166, 0.5262)
## No Information Rate : 0.1115
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4674
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 0 Class: 1 Class: 2 Class: 3 Class: 4
## Sensitivity 0.93514 0.9727 0.00000 0.71731 0.84700
## Specificity 0.74390 0.9430 1.00000 0.95325 0.88257
## Pos Pred Value 0.28491 0.6818 NaN 0.63942 0.43642
## Neg Pred Value 0.99058 0.9964 0.90055 0.96686 0.98173
## Prevalence 0.09838 0.1115 0.09945 0.10360 0.09695
## Detection Rate 0.09200 0.1085 0.00000 0.07431 0.08212
## Detection Prevalence 0.32290 0.1591 0.00000 0.11621 0.18817
## Balanced Accuracy 0.83952 0.9579 0.50000 0.83528 0.86479
## Class: 5 Class: 6 Class: 7 Class: 8 Class: 9
## Sensitivity 0.000e+00 0.75997 0.85571 0.000e+00 0.000e+00
## Specificity 1.000e+00 0.96052 0.98500 1.000e+00 1.000e+00
## Pos Pred Value 0.000e+00 0.67773 0.86975 0.000e+00 0.000e+00
## Neg Pred Value 9.096e-01 0.97342 0.98314 9.033e-01 9.003e-01
## Prevalence 9.036e-02 0.09850 0.10479 9.674e-02 9.971e-02
## Detection Rate 0.000e+00 0.07486 0.08967 0.000e+00 0.000e+00
## Detection Prevalence 2.381e-05 0.11045 0.10310 2.381e-05 2.381e-05
## Balanced Accuracy 5.000e-01 0.86024 0.92036 5.000e-01 5.000e-01
submitframe=as.data.frame(seq(1:28000))
submitframe$Label=predict(mynn,test,type="class")
colnames(submitframe)=c("ImageId", "Label")
write.csv(submitframe,"firstsubmit.csv", row.names=FALSE)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:EBImage':
##
## combine
#library(party)
myrf=randomForest(label~., data=tn, ntree=100, mtry=10, maxnodes=10, importance=FALSE)
#mycf = cforest(label ~ ., data = tn, controls=cforest_unbiased(ntree=1000, mtry=10)) #party package
myrf
##
## Call:
## randomForest(formula = label ~ ., data = tn, ntree = 100, mtry = 10, maxnodes = 10, importance = FALSE)
## Type of random forest: classification
## Number of trees: 100
## No. of variables tried at each split: 10
##
## OOB estimate of error rate: 31.44%
## Confusion matrix:
## 0 1 2 3 4 5 6 7 8 9 class.error
## 0 3921 24 10 53 12 2 56 24 26 4 0.05106486
## 1 1 4637 24 8 1 0 3 8 2 0 0.01003416
## 2 192 561 2715 140 101 4 200 197 34 33 0.35001197
## 3 136 418 137 3257 26 4 52 164 52 105 0.25143645
## 4 45 260 14 21 2716 1 165 284 14 552 0.33300589
## 5 360 688 43 1357 122 475 145 231 142 232 0.87483531
## 6 217 428 34 46 91 6 3251 40 8 16 0.21416485
## 7 24 284 37 6 54 0 5 3901 2 88 0.11361054
## 8 97 1014 218 589 73 6 163 115 1567 221 0.61432439
## 9 62 312 14 82 504 0 53 786 18 2357 0.43720153
rfsubmit=submitframe
rfsubmit$label=predict(myrf,test,type="class")
colnames(submitframe)=c("ImageId", "Label")
write.csv(submitframe,"secondsubmit.csv", row.names=FALSE)
###You will need to run this to install mxnet###
#cran <- getOption("repos")
#cran["dmlc"] <- "https://s3-us-west-2.amazonaws.com/apache-mxnet/R/CRAN/"
#options(repos = cran)
#install.packages("mxnet",dependencies = T)
library(mxnet)
## Warning: package 'mxnet' was built under R version 3.4.0
library(mlbench)
## Warning: package 'mlbench' was built under R version 3.3.3
mx.set.seed(0)
X=as.matrix(tn[,-c(1)])
y=tn[,1]
model=mx.mlp(X,y, hidden_node=100, out_node=100, activation="tanh", out_activation="softmax",num.round=20, learning.rate=0.1, momentum=0.9, eval.metric=mx.metric.accuracy)
## Warning in mx.model.select.layout.train(X, y): Auto detect layout of input matrix, use rowmajor..
## Start training with 1 devices
## [1] Train-accuracy=0.31176162347561
## [2] Train-accuracy=0.259403495440729
## [3] Train-accuracy=0.213098404255319
## [4] Train-accuracy=0.176481762917933
## [5] Train-accuracy=0.199966755319149
## [6] Train-accuracy=0.238316869300912
## [7] Train-accuracy=0.243992211246201
## [8] Train-accuracy=0.223594224924012
## [9] Train-accuracy=0.213407104863222
## [10] Train-accuracy=0.197972074468085
## [11] Train-accuracy=0.179948708206687
## [12] Train-accuracy=0.197188449848024
## [13] Train-accuracy=0.197758358662614
## [14] Train-accuracy=0.195265007598784
## [15] Train-accuracy=0.189043503039514
## [16] Train-accuracy=0.205737082066869
## [17] Train-accuracy=0.206212006079027
## [18] Train-accuracy=0.208847834346505
## [19] Train-accuracy=0.209417743161094
## [20] Train-accuracy=0.210984992401216