Data on handwritten zip-code digits from the United States Postal Serviceis divided into the data sets train (7,291 observations) and `test’ (2,007observations). Of the 257 columnsin both datasets, the rst contains the known data labels, which simplyrepresent the identity of each handwritten image [0,..,9]. The remaining 256 columns, for each single digit, represent the concatenation of a 16x16greyscale map matrix into a vector of length 256, using the scale [-1,1],corresponding to [black, white]. This matrix is obtained by subdividing thedigit entry box into a 16x16 grid and then scanning.

I aim to practice the Tuning Parameters with different Kernels of SVM utilizing Caret package. I also use MLP Neural Network with Keras to compare the accuracy with Parameterized SVM.

library(keras)
library(caret)
library(MLmetrics)
library(kernlab)

1. Import Data

train <- read.table("train.txt", header = F)
test <- read.table("test.txt", header = F)

dim(train)

## [1] 7291  257

library(doParallel)
cl <- makeCluster(2)
registerDoParallel(cl)

2. Model Building

The dataset is already scaled to [-1,1] range corresponding to [white,black].
PreProcess using PCA. I want to build a “segmenter” using PCA first and then build the “classifier” later.

preProcValues <- preProcess(train[,-1], method = "pca")
# transformed for Train
transformed_train <- predict(preProcValues, train[,-1])
transformed_train$response <- train[,1]
# convert Response to Catogorical / Rounded Numbers from [0,....9]
transformed_train$response <- as.factor(transformed_train$response)
# transformed for Test
transformed_test <- predict(preProcValues, test[,-1])
transformed_test$response <- test[,1]

2.1 “Polynomial” Kernel with Tuning Parameters

Tuning Parameters on C, Degree, scale=1.

tune.poly <- expand.grid(C= c(0.1,1,10,100), degree=c(1,2,3) ,scale= 1)
my_Control <- trainControl(method="cv",number=5)
set.seed(123)
sel.poly <- train(response ~ . , data= transformed_train,method="svmPoly",trControl=my_Control,tuneGrid=tune.poly)
caret_poly_predict <- predict(sel.poly, transformed_test[,-257])

Accuracy with Test Set.

mean(caret_poly_predict == transformed_test$response)

## [1] 0.9402093

Model Summary

sel.poly

## Support Vector Machines with Polynomial Kernel 
## 
## 7291 samples
##  107 predictor
##   10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 5833, 5832, 5832, 5834, 5833 
## Resampling results across tuning parameters:
## 
##   C      degree  Accuracy   Kappa    
##     0.1  1       0.9502098  0.9441941
##     0.1  2       0.9691406  0.9654179
##     0.1  3       0.9255237  0.9162976
##     1.0  1       0.9443133  0.9375773
##     1.0  2       0.9688664  0.9651101
##     1.0  3       0.9255237  0.9162976
##    10.0  1       0.9433535  0.9365008
##    10.0  2       0.9688664  0.9651101
##    10.0  3       0.9255237  0.9162976
##   100.0  1       0.9433535  0.9365023
##   100.0  2       0.9688664  0.9651101
##   100.0  3       0.9255237  0.9162976
## 
## Tuning parameter 'scale' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 1 and C = 0.1.

2.2 “Radial” Kernel with Tuning Parameters

Tuning Parameters on C, Sigma.

tune.radial <- expand.grid(C= c(0.01, 0.1, 1, 5, 10), sigma = c(0.001, 0.01, 0.1, 1, 5)) 
my_Control <- trainControl(method="cv",number=5)
set.seed(123)
sel.radial <- train(response ~ . , data= transformed_train,method="svmRadial",trControl=my_Control,tuneGrid=tune.radial)
caret_radial_predict <- predict(sel.radial, transformed_test[,-257])

Accuracy with Test Set.

mean(caret_radial_predict == transformed_test$response)

## [1] 0.9297459

Model Summary after Tuning Parameters

sel.radial

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 7291 samples
##  107 predictor
##   10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 5833, 5832, 5832, 5834, 5833 
## Resampling results across tuning parameters:
## 
##   C      sigma  Accuracy   Kappa       
##    0.01  0.001  0.1637635  0.0000000000
##    0.01  0.010  0.2988611  0.1650721756
##    0.01  0.100  0.2298704  0.0809388289
##    0.01  1.000  0.1637635  0.0000000000
##    0.01  5.000  0.1637635  0.0000000000
##    0.10  0.001  0.5654917  0.4957597232
##    0.10  0.010  0.8237535  0.8021227766
##    0.10  0.100  0.2833618  0.1461871873
##    0.10  1.000  0.1637635  0.0000000000
##    0.10  5.000  0.1637635  0.0000000000
##    1.00  0.001  0.9470577  0.9406707583
##    1.00  0.010  0.9429413  0.9360605311
##    1.00  0.100  0.3587982  0.2406672474
##    1.00  1.000  0.2116296  0.0586358464
##    1.00  5.000  0.1637635  0.0000000000
##    5.00  0.001  0.9602243  0.9554333240
##    5.00  0.010  0.9488399  0.9426749094
##    5.00  0.100  0.3779995  0.2646527724
##    5.00  1.000  0.2178004  0.0662018849
##    5.00  5.000  0.1640379  0.0003365733
##   10.00  0.001  0.9615955  0.9569673134
##   10.00  0.010  0.9489771  0.9428286455
##   10.00  0.100  0.3779995  0.2646527724
##   10.00  1.000  0.2178004  0.0662018849
##   10.00  5.000  0.1640379  0.0003365733
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.001 and C = 10.

2.3 Deep Learning MLP

train <- read.table("train.txt", header = F)
test <- read.table("test.txt", header = F)

x_train <- train[,2:257]
y_train <- train[,1]
x_test <- test[,2:257]
y_test <- test[,1]

y_train <- as.matrix(y_train)
y_test <- as.matrix(y_test)
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)
x_train <- as.matrix(x_train)
y_train <- as.matrix(y_train)

Step1: Defining the Model

model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 256, activation = 'relu', input_shape = c(256)) %>% 
  layer_dropout(rate = 0.4) %>% 
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 64, activation = 'relu') %>%
  layer_dropout(rate = 0.25) %>%
  layer_dense(units = 10, activation = 'softmax')

Step 2: Compile the Model

model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('accuracy')
)

Step 3: Fit to Training Dataset

history <- fit(
  object           = model, 
  x                = as.matrix(x_train), 
  y                = y_train,
  batch_size       = 50,
  epochs           = 35,
  validation_split = 0.3
)

print(history)

## Trained on 5,103 samples (batch_size=50, epochs=35)
## Final epoch (plot to see history):
##     loss: 0.02772
##      acc: 0.992
## val_loss: 0.1451
##  val_acc: 0.9698

yhat_keras_class_vec <- predict_classes(object = model, x = as.matrix(x_test)) %>%
    as.vector()

Accuracy of MLP

mean(yhat_keras_class_vec == test[,1])

## [1] 0.9392128

2.4 CNN (Convolutional neural network)

train <- read.table("train.txt", header = F)
test <- read.table("test.txt", header = F)

train<-data.matrix(train)
test<-data.matrix(test)

x_train <- train[,2:257]
y_train <- train[,1]
x_test <- test[,2:257]
y_test <- test[,1]

y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)

## convert to 4d array
x_train_cnn<-array_reshape(data.matrix(train[,2:257]),c(nrow(train),16,16,1))

#Data partition
y_train_cnn<-data.matrix(train[,1])
y_train_cnn<-to_categorical(y_train_cnn)

model <- keras_model_sequential() %>%
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
                input_shape = c(16, 16, 1)) %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_flatten() %>%
  layer_dropout(rate=0.5) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 10, activation = "softmax")

model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('accuracy')
)

history<- model %>% fit(
    x_train_cnn,y_train_cnn,
    epochs=10,
    batch_size=32)

history

## Trained on 7,291 samples (batch_size=32, epochs=10)
## Final epoch (plot to see history):
## loss: 0.07905
##  acc: 0.9764

x_test_cnn <-array_reshape(data.matrix(test[,2:257]),c(nrow(test),16,16,1))
y_test_cnn<-data.matrix(train[,1])

yhat_keras_class_vec <- predict_classes(object = model, x = x_test_cnn) %>%
    as.vector()

Accuracy of CNN

mean(yhat_keras_class_vec == test[,1])

## [1] 0.9586447

Digit Recognizer

Hai Long, Le- 18200524

July 6, 2019

I aim to practice the Tuning Parameters with different Kernels of SVM utilizing Caret package. I also use MLP Neural Network with Keras to compare the accuracy with Parameterized SVM.

1. Import Data

2. Model Building

2.1 “Polynomial” Kernel with Tuning Parameters

Accuracy with Test Set.

Model Summary

2.2 “Radial” Kernel with Tuning Parameters

Accuracy with Test Set.

Model Summary after Tuning Parameters

2.3 Deep Learning MLP

Step1: Defining the Model

Step 2: Compile the Model

Step 3: Fit to Training Dataset

Accuracy of MLP

2.4 CNN (Convolutional neural network)

Accuracy of CNN