2. Model Building
- The dataset is already scaled to [-1,1] range corresponding to [white,black].
- PreProcess using PCA. I want to build a “segmenter” using PCA first and then build the “classifier” later.
preProcValues <- preProcess(train[,-1], method = "pca")
# transformed for Train
transformed_train <- predict(preProcValues, train[,-1])
transformed_train$response <- train[,1]
# convert Response to Catogorical / Rounded Numbers from [0,....9]
transformed_train$response <- as.factor(transformed_train$response)
# transformed for Test
transformed_test <- predict(preProcValues, test[,-1])
transformed_test$response <- test[,1]
2.1 “Polynomial” Kernel with Tuning Parameters
- Tuning Parameters on C, Degree, scale=1.
tune.poly <- expand.grid(C= c(0.1,1,10,100), degree=c(1,2,3) ,scale= 1)
my_Control <- trainControl(method="cv",number=5)
set.seed(123)
sel.poly <- train(response ~ . , data= transformed_train,method="svmPoly",trControl=my_Control,tuneGrid=tune.poly)
caret_poly_predict <- predict(sel.poly, transformed_test[,-257])
Accuracy with Test Set.
mean(caret_poly_predict == transformed_test$response)
## [1] 0.9402093
Model Summary
sel.poly
## Support Vector Machines with Polynomial Kernel
##
## 7291 samples
## 107 predictor
## 10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 5833, 5832, 5832, 5834, 5833
## Resampling results across tuning parameters:
##
## C degree Accuracy Kappa
## 0.1 1 0.9502098 0.9441941
## 0.1 2 0.9691406 0.9654179
## 0.1 3 0.9255237 0.9162976
## 1.0 1 0.9443133 0.9375773
## 1.0 2 0.9688664 0.9651101
## 1.0 3 0.9255237 0.9162976
## 10.0 1 0.9433535 0.9365008
## 10.0 2 0.9688664 0.9651101
## 10.0 3 0.9255237 0.9162976
## 100.0 1 0.9433535 0.9365023
## 100.0 2 0.9688664 0.9651101
## 100.0 3 0.9255237 0.9162976
##
## Tuning parameter 'scale' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = 1 and C = 0.1.
2.2 “Radial” Kernel with Tuning Parameters
- Tuning Parameters on C, Sigma.
tune.radial <- expand.grid(C= c(0.01, 0.1, 1, 5, 10), sigma = c(0.001, 0.01, 0.1, 1, 5))
my_Control <- trainControl(method="cv",number=5)
set.seed(123)
sel.radial <- train(response ~ . , data= transformed_train,method="svmRadial",trControl=my_Control,tuneGrid=tune.radial)
caret_radial_predict <- predict(sel.radial, transformed_test[,-257])
Accuracy with Test Set.
mean(caret_radial_predict == transformed_test$response)
## [1] 0.9297459
Model Summary after Tuning Parameters
sel.radial
## Support Vector Machines with Radial Basis Function Kernel
##
## 7291 samples
## 107 predictor
## 10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 5833, 5832, 5832, 5834, 5833
## Resampling results across tuning parameters:
##
## C sigma Accuracy Kappa
## 0.01 0.001 0.1637635 0.0000000000
## 0.01 0.010 0.2988611 0.1650721756
## 0.01 0.100 0.2298704 0.0809388289
## 0.01 1.000 0.1637635 0.0000000000
## 0.01 5.000 0.1637635 0.0000000000
## 0.10 0.001 0.5654917 0.4957597232
## 0.10 0.010 0.8237535 0.8021227766
## 0.10 0.100 0.2833618 0.1461871873
## 0.10 1.000 0.1637635 0.0000000000
## 0.10 5.000 0.1637635 0.0000000000
## 1.00 0.001 0.9470577 0.9406707583
## 1.00 0.010 0.9429413 0.9360605311
## 1.00 0.100 0.3587982 0.2406672474
## 1.00 1.000 0.2116296 0.0586358464
## 1.00 5.000 0.1637635 0.0000000000
## 5.00 0.001 0.9602243 0.9554333240
## 5.00 0.010 0.9488399 0.9426749094
## 5.00 0.100 0.3779995 0.2646527724
## 5.00 1.000 0.2178004 0.0662018849
## 5.00 5.000 0.1640379 0.0003365733
## 10.00 0.001 0.9615955 0.9569673134
## 10.00 0.010 0.9489771 0.9428286455
## 10.00 0.100 0.3779995 0.2646527724
## 10.00 1.000 0.2178004 0.0662018849
## 10.00 5.000 0.1640379 0.0003365733
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.001 and C = 10.
2.3 Deep Learning MLP
train <- read.table("train.txt", header = F)
test <- read.table("test.txt", header = F)
x_train <- train[,2:257]
y_train <- train[,1]
x_test <- test[,2:257]
y_test <- test[,1]
y_train <- as.matrix(y_train)
y_test <- as.matrix(y_test)
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)
x_train <- as.matrix(x_train)
y_train <- as.matrix(y_train)
Step1: Defining the Model
model <- keras_model_sequential()
model %>%
layer_dense(units = 256, activation = 'relu', input_shape = c(256)) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 64, activation = 'relu') %>%
layer_dropout(rate = 0.25) %>%
layer_dense(units = 10, activation = 'softmax')
Step 2: Compile the Model
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('accuracy')
)
Step 3: Fit to Training Dataset
history <- fit(
object = model,
x = as.matrix(x_train),
y = y_train,
batch_size = 50,
epochs = 35,
validation_split = 0.3
)
print(history)
## Trained on 5,103 samples (batch_size=50, epochs=35)
## Final epoch (plot to see history):
## loss: 0.02772
## acc: 0.992
## val_loss: 0.1451
## val_acc: 0.9698
yhat_keras_class_vec <- predict_classes(object = model, x = as.matrix(x_test)) %>%
as.vector()
Accuracy of MLP
mean(yhat_keras_class_vec == test[,1])
## [1] 0.9392128
2.4 CNN (Convolutional neural network)
train <- read.table("train.txt", header = F)
test <- read.table("test.txt", header = F)
train<-data.matrix(train)
test<-data.matrix(test)
x_train <- train[,2:257]
y_train <- train[,1]
x_test <- test[,2:257]
y_test <- test[,1]
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)
## convert to 4d array
x_train_cnn<-array_reshape(data.matrix(train[,2:257]),c(nrow(train),16,16,1))
#Data partition
y_train_cnn<-data.matrix(train[,1])
y_train_cnn<-to_categorical(y_train_cnn)
model <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
input_shape = c(16, 16, 1)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dropout(rate=0.5) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 10, activation = "softmax")
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('accuracy')
)
history<- model %>% fit(
x_train_cnn,y_train_cnn,
epochs=10,
batch_size=32)
history
## Trained on 7,291 samples (batch_size=32, epochs=10)
## Final epoch (plot to see history):
## loss: 0.07905
## acc: 0.9764
x_test_cnn <-array_reshape(data.matrix(test[,2:257]),c(nrow(test),16,16,1))
y_test_cnn<-data.matrix(train[,1])
yhat_keras_class_vec <- predict_classes(object = model, x = x_test_cnn) %>%
as.vector()
Accuracy of CNN
mean(yhat_keras_class_vec == test[,1])
## [1] 0.9586447