Click here for other works of the author on RPubs
In this assignment, we build a very simple neural network model with one layer of neurons and no hidden layers. We also compare the efficiency of sequential and batch update methods.
Generate data for training and test set with some normally distributed noise
#generate training set
x1 = runif(1000, -1, 1)
x2 = runif(1000, -1, 1)
train = data.frame(x1, x2, y = 3 * x1 - 2 * x2 + rnorm(1000, sd = 0.3))
#generate test set
x1_2 = runif(1000, -1, 1)
x2_2 = runif(1000, -1, 1)
test = data.frame(x1_2, x2_2, y = 3 * x1_2 - 2 * x2_2 + rnorm(1000, sd = 0.3))
Define function adaline to optimize weights in a simple neural network model. The function uses sequential update.
adaline <- function(iv, dv, learn_rate = 0.01, inits = runif(ncol(iv) + 1, -0.5, 0.5)){
n = length(dv)
iv_matrix = data.frame(rep(1, n), iv)
weights = matrix(0, nrow = n + 1, ncol = length(inits))
weights[1, ] = inits
pred = numeric(n)
error = numeric(n)
for(i in 1:n){
pred[i] = sum(weights[i, ] * iv_matrix[i, ])
error[i] = dv[i] - pred[i]
weights[1 + i, ] = unlist(weights[i, ] + learn_rate * iv_matrix[i, ] * error[i])
}
return(list(pred = pred, error = error, weights= weights))
}
model = adaline(iv = train[, 1:2], dv = train[, 3])
Define function adaline to optimize weights in a simple neural network model. The function uses batch update (matrix computation).
adaline_m <- function(iv, dv, n = 1000, learn_rate = 0.001, inits = runif(ncol(iv) + 1, -0.5, 0.5)){
weight = t(as.matrix(inits))
n_data = length(dv)
iv_matrix = as.matrix(data.frame(rep(1, n_data), iv))
weights = matrix(0, nrow = n, ncol = length(inits))
mse = numeric()
errors = numeric()
for(i in 1:n){
pred = as.vector(weight %*% t(iv_matrix))
error = as.matrix(dv - pred)
weight = weight + learn_rate * as.vector(t(iv_matrix) %*% error)
weights[i,] = weight
mse[i] = mean(error ^ 2)
errors[i] = mean(abs(error))
}
return(list(weights = weights, mse = mse, error = errors))
}
model_m = adaline_m(iv = train[,1:2], dv = train[,3])
Sequential = system.time(adaline(iv = train[,1:2], dv = train[,3]))
Batch = system.time(adaline_m(iv = train[,1:2], dv = train[,3]))
knitr::kable(t(cbind(Sequential, Batch))[, 1:3])
| user.self | sys.self | elapsed | |
|---|---|---|---|
| Sequential | 3.33 | 0.00 | 4.17 |
| Batch | 0.22 | 0.02 | 0.25 |
Algorithm using batch update is substantially faster than sequential updating.
plot(1:1000, model$error, type = "l", xlab = "Number of training", ylab = "Prediction error")
plot(1:1000, model_m$error, type = "l", xlab = "Number of training", ylab = "Prediction error")
Error does not converge to 0 because random noise is added to our data.
Define function to calculate MSE of prediction
mse <- function(weights, iv, dv){
iv = as.matrix(iv)
pred = rowSums(t(apply(iv, 1, function(x) weights * x)))
mse = sum((dv - pred) ^ 2) /length(dv)
return(mse)
}
#calculate MSE at each number of training
mse_train = apply(model$weights, 1, mse, iv = data.frame(rep(1, 1000), train[, 1:2]), dv = train[, 3])
plot(0:1000, mse_train, type = "l", main = "MSE according to number of training (training set)", xlab = "Number of training", ylab = "MSE")
#calculate MSE at each number of training
mse_train = apply(model_m$weights, 1, mse, iv = data.frame(rep(1, 1000), train[, 1:2]), dv = train[, 3])
plot(1:1000, mse_train, type = "l", main = "MSE according to number of training (training set)", xlab = "Number of training", ylab = "MSE")
MSE of the training set generally decreases as we train our model through more iterations. It also decreases much faster when batch update method is used.
min_mse = which.min(mse_train)
optim_weight = model_m$weights[min_mse, ]
optim_weight = data.frame(optim_weight)
rownames(optim_weight) <- c("Error term (intercept)", "X1", "X2")
knitr::kable(optim_weight, col.names = c("Weights for variables"))
| Weights for variables | |
|---|---|
| Error term (intercept) | 0.0032802 |
| X1 | 2.9894181 |
| X2 | -1.9912247 |
The MSE for the training set using the optimal weights is 0.0851537
Calculate MSE using weights from each training iteration for the test set, also plot MSE vs number of training.
mse_test = apply(model$weights, 1, mse, iv = data.frame(rep(1, 1000), test[, 1:2]), dv = test[, 3])
plot(0:1000, mse_test, type = "l", main = "MSE according to number of training (test set)", xlab = "Number of training", ylab = "MSE")
mse_test_m = apply(model_m$weights, 1, mse, iv = data.frame(rep(1, 1000), test[, 1:2]), dv = test[, 3])
plot(1:1000, mse_test_m, type = "l", main = "MSE according to number of training (test set)", xlab = "Number of training", ylab = "MSE")
MSE of the test set generally decreases as weights later training iterations. The MSE for the test set using the optimal weights is 2.7857413