Click here for other works of the author on RPubs

In this assignment, we build a classification model using multi-layer perceptron.

Load packages

library(knitr)
library(ggplot2)

1. Use the data provided (modeldata.txt). The first two columns are \(x_1\) and \(x_2\), the column 3 to 5 represent codings for three class (y).

Load data

data <- read.table("modeldata.txt")
colnames(data) <- c("X1", "X2", "Group1", "Group2", "Group3")

#show a part of the data
kable(head(data), digits = 4)
X1 X2 Group1 Group2 Group3
-0.0109 -0.1563 -1 -1 1
0.3338 -0.0011 -1 1 -1
-0.1366 -0.3341 -1 -1 1
0.2957 0.1368 -1 1 -1
-0.4264 -0.0864 1 -1 -1
0.7697 0.4290 -1 -1 1

Visualize data

#plot data according to group
p_data <- data.frame(data[, 1:2], apply(data[, 3:5], 1, which.max))
colnames(p_data) <- c("X1", "X2", "Group")
p_data$Group = factor(p_data$Group)
ggplot(p_data, aes(x = X1, y = X2, col = Group))+
    geom_point(alpha = 0.8, size = 2) +
    theme_bw() +
    labs(x = expression(x[1]), y = expression(x[2])) +
    coord_fixed(ratio = 1) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

2. Write your own MLP. Use off-line learning, learning rate = 0.001, learning time = 1000 step, # of hidden neuron = 5, use tanh as your activation function. Plot MSE vs learning steps. Calculate the min(MSE) and associated optimal weights. (NOTE: you should try different initial conditions several times to check whether you get trapped in the local minimum.)

in progress…

Appendix - packages

Part 1: neuralnet package

The greatest strength of the neuralnet package is it allows for us to plot the neural network model to examine it visually.

Use package neuralnet to build neural network model

library(neuralnet)

nn <- neuralnet(Group1 + Group2 + Group3 ~ X1 + X2, data = data, hidden = 5)

show results

#plot with weights
plot(nn)

pred_nn <- compute(nn, data[, 1:2])

table(p_data$Group, apply(pred_nn$net.result, 1, which.max))
##    
##       1   2   3
##   1 166   0   1
##   2   0 219   5
##   3   2   7 200

Prettier plot

library(NeuralNetTools)
plotnet(nn, pos_col = "red", neg_col = "blue", alpha_val = 0.5)

Red lines indicates positive weights, blue for negative weights.

Part 2: H2O package

Created for buliding machine learning models, the H2O package offers users greater computing power by connecting to another remote machine. Neural network models often requires immense computing power, thus the package is very useful when building such models.

Build neural network model using H2O package

#load h2o package and connect to the ip it provides
library(h2o)

#connect to h2o server
h2o.init(nthreads = -1)
## Warning in .h2o.startJar(ip = ip, port = port, nthreads = nthreads, max_memory = max_mem_size, : You have a 32-bit version of Java. H2O works best with 64-bit Java.
## Please download the latest Java SE JDK 7 from the following URL:
## http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html
#seperate data into training and test sets
train = p_data[1:500, ]
test = p_data[501:600, ]
    
#train a neural network model for classification
model = h2o.deeplearning(y = "Group", training_frame = as.h2o(train), validation_frame = as.h2o(test), activation = "Rectifier", hidden = 5, epochs = 6000, train_samples_per_iteration = -2, export_weights_and_biases = T, nfolds = 10, fold_assignment = "Stratified")

#predict groups using our neural network model
pred = h2o.predict(model, as.h2o(p_data[, 1:2]))

#weights of nn model
weight1 = as.data.frame(t(h2o.weights(model, 1)))
rownames(weight1) <- c("X1", "X2")
colnames(weight1) <- c("Hidden 1", "Hidden 2", "Hidden 3", "Hidden 4", "Hidden 5")
kable(weight1, digits = 4, caption = "Weights of first layer")

weight2 = as.data.frame(t(h2o.weights(model, 2)))
rownames(weight2) <- c("Hidden 1", "Hidden 2", "Hidden 3", "Hidden 4", "Hidden 5")
colnames(weight2) <- c("Class 1", "Class 2", "Class 3")
kable(weight2, digits = 4, caption = "Weights of second layer")

Confusion matrix for training set

conf_train <- table(train[, 3], as.data.frame(pred)$predict[1:500])
conf_train
##    
##       1   2   3
##   1 140   1   3
##   2   0 176   6
##   3   1   5 168

The overall accuracy of our predictions in the training set is 0.968.

Confusion matrix for test set

conf_test <- table(test[, 3], as.data.frame(pred)$predict[501:600])
conf_test
##    
##      1  2  3
##   1 23  0  0
##   2  0 39  3
##   3  0  0 35

The overall accuracy of our predictions in the test set is 0.97.

Accuracy diagnosis using 10-fold cross validation

#10-fold cross validation
model@model$cross_validation_metrics_summary
## Cross-Validation Metrics Summary: 
##                                mean          sd  cv_1_valid  cv_2_valid
## accuracy                 0.96434057 0.016589554   0.9807692   0.9268293
## err                     0.035659444 0.016589554  0.01923077  0.07317073
## err_count                       1.7   0.7106335         1.0         3.0
## logloss                  0.08635643  0.03273224  0.03930826  0.13041443
## max_per_class_error      0.07739029 0.034115613        0.05       0.125
## mean_per_class_accuracy   0.9625304 0.017848054  0.98333335   0.9398148
## mean_per_class_error    0.037469655 0.017848054 0.016666668 0.060185187
## mse                      0.02441893 0.009523669 0.009721552 0.044909593
## r2                       0.95948374 0.016554592   0.9840491     0.91649
## rmse                      0.1503083 0.030218758  0.09859793  0.21191883
##                          cv_3_valid  cv_4_valid  cv_5_valid  cv_6_valid
## accuracy                 0.95238096   0.9318182  0.94545454         1.0
## err                      0.04761905  0.06818182 0.054545455         0.0
## err_count                       2.0         3.0         3.0         0.0
## logloss                 0.068650424  0.14642644   0.1763936 0.048803084
## max_per_class_error      0.16666667  0.14285715  0.05882353         0.0
## mean_per_class_accuracy   0.9206349   0.9327731    0.945207         1.0
## mean_per_class_error     0.07936508 0.067226894  0.05479303         0.0
## mse                     0.020648217 0.044031717  0.04034232 0.009975656
## r2                        0.9604093  0.92818415  0.93657196    0.984327
## rmse                     0.14369488  0.20983736  0.20085397  0.09987821
##                          cv_7_valid  cv_8_valid  cv_9_valid cv_10_valid
## accuracy                  0.9818182  0.98333335   0.9591837   0.9818182
## err                     0.018181818 0.016666668 0.040816326 0.018181818
## err_count                       1.0         1.0         2.0         1.0
## logloss                  0.04758693  0.05573547  0.09901215  0.05123356
## max_per_class_error     0.055555556      0.0625      0.0625        0.05
## mean_per_class_accuracy   0.9814815   0.9791667  0.95955884  0.98333335
## mean_per_class_error    0.018518519 0.020833334 0.040441178 0.016666668
## mse                     0.013559352 0.015151465 0.030300405 0.015549032
## r2                        0.9786814   0.9756928   0.9549807  0.97545105
## rmse                    0.116444625  0.12309129  0.17407012 0.124695756

Plot error rate according to epoch for training and test sets

Test set is named “validation” in the diagram

#plot epoch according to classification error
plot(model)

Prepare data for plotting

grid <- as.data.frame(expand.grid(seq(min(data[, 1]), max(data[, 1]), length = 1000), seq(min(data[, 2]), max(data[, 2]), length = 1000)))
colnames(grid) <- c("X1", "X2")
pred <- h2o.predict(model, as.h2o(grid))
pred <- as.data.frame(pred)$predict

Plot the decision boundary of our neural network model

ggplot() +
    geom_raster(aes(x = grid[, 1],y = grid[, 2], fill = pred), alpha = 0.3, show.legend = F) +
    theme_bw() +
    geom_point(data = p_data, aes(x = X1, y = X2, color = Group), size = 2) + 
    labs(title = "Neural network decision boundary", x = expression(x[1]), y = expression(x[2])) +
    theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())