Objectives

The aim of this project is to develop a regression model using an A-NN. The data set consists of 5000 rows and 6 columns: (Y, X1, X2, X3, X4 and X5) where Y is the dependent variable (label), and X1, X2, X3, X4 and X5 are the independent variables.

Install required packages

library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
library(ggplot2)
library(RColorBrewer)
library(yuima)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: stats4
## Loading required package: expm
## Loading required package: Matrix
## 
## Attaching package: 'expm'
## The following object is masked from 'package:Matrix':
## 
##     expm
## Loading required package: cubature
## Loading required package: mvtnorm
## ############################################
## This is YUIMA Project package.
## Check for the latest development version at:
## http://R-Forge.R-Project.org/projects/yuima
## ############################################
## 
## Attaching package: 'yuima'
## The following object is masked from 'package:stats':
## 
##     simulate
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
library(MASS)
library(neuralnet)
library(NeuralNetTools)
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.4.2
colors <- brewer.pal(n=6, name="Dark2")
colors2 <- brewer.pal(n=12, name="Paired")

Get Data

myData <- read.csv("/Users/ruthokoilu/Desktop/ALL_HW/Project5/OKOILU RUTH.csv", header = TRUE)

#Perform some exploratory analysis on myData
head(myData)
##           X1 X2          X3   X4            X5         Y
## 1 252.832437  4 0.017277474  896  3.295739e-27 1867.9412
## 2   1.135719  5 0.014051343 1400  1.586275e-04  286.3422
## 3  66.521609  4 0.042737765  896  4.259385e-33  606.0806
## 4  95.909546  5 0.018511172 1400  1.256261e-03  923.1026
## 5  82.959362  3 0.023961203  504 4.593689e-144  649.2804
## 6 109.970351  4 0.001079411  896  4.244908e-20  902.9684
summary(myData)
##        X1                X2              X3                  X4        
##  Min.   :-221.09   Min.   :1.000   Min.   :8.700e-07   Min.   :  56.0  
##  1st Qu.:  22.93   1st Qu.:2.000   1st Qu.:5.098e-03   1st Qu.: 224.0  
##  Median :  62.50   Median :3.000   Median :1.273e-02   Median : 504.0  
##  Mean   :  63.08   Mean   :3.017   Mean   :1.794e-02   Mean   : 623.4  
##  3rd Qu.: 103.07   3rd Qu.:4.000   3rd Qu.:2.503e-02   3rd Qu.: 896.0  
##  Max.   : 367.15   Max.   :5.000   Max.   :1.479e-01   Max.   :1400.0  
##        X5                 Y          
##  Min.   :0.000000   Min.   :-1241.0  
##  1st Qu.:0.000000   1st Qu.:  279.1  
##  Median :0.000000   Median :  546.4  
##  Mean   :0.013427   Mean   :  550.5  
##  3rd Qu.:0.000014   3rd Qu.:  829.6  
##  Max.   :3.483430   Max.   : 2648.6
dim(myData)
## [1] 5000    6
# Since we are trying to predict Y a continuous attribute,  ANN here is used as regression

# check for missing values
sum(is.na(myData))
## [1] 0

The data has 5000 rows and 6 columns No missing values recorded.

We are interested in the last row or last column of the plot below. X1 is largely positively correlated with Y.

par(mfrow= c(1,1))
corrplot(cor(myData))

Set Seed

Here, we’ll set seed to ensure that our experiment (ANN algorithm) gives a consistent and reproducible result every time we run it

set.seed(12345)

Tasks

Task 1: Train a neural net with 6 input units and a single output unit.

Assume a number of hidden layers and units per layer and then train your neural net model using the cross validation method. That is, Split the data into 10 subsets of 500 observations each. Then combine subsets 1 to 9 to create the training data set, and use subset 10 for testing.

Scaling

Before we do cross validation, we will scale/normalize the predictors to z-scores ∼N(0,1) and scale the predicted variable to values between [0..1] . We are normalizing the variables so that all inputs will be at a comparable range and weights can be assigned without bias.

# Scaling all variables

mins <- apply(myData, 2, min)
maxs <- apply(myData, 2, max)
myData_scaled<- as.data.frame(scale(myData, center = mins, scale = maxs - mins))

We will use function generateNN() which I’ve written. This function scales the training set, uses an existing ann function called neuralnet() to create our ANN model; tests using the testing data and make some additional calculations for the purpose of evaluating the model such as the model nn_model, the error 𝑦 − 𝑦, sse, rmse,rsqua.red

generateNN <- function(training, testing, h_layers){
  
headings <- names(training)
form <- as.formula(paste("Y ~", paste(headings[!headings %in% "Y"], collapse = " + ")))
nn_model <- neuralnet(form,data=training,hidden=h_layers,linear.output=T)

# Visualizing the ANN model
# par(mfrow=c(1,2))
#plotnet(nn_model, main = "Fig a. 6 inputs 2 hidden layers (4 and 3 units respectively 1 output layer)")

# Blue lines are bias terms and black lines are weights

# Now that the training algorithm has converged we can now use our model nn_modelon the testing set. 

pred <- compute(nn_model, testing[,1:5])

# Compare predicted values of Y with testing1 i.e. original value of Y

# We'll unscale in order to relate to the actual value of Y


testing_unscaled <- (testing$Y)*(max(myData$Y)-min(myData$Y))+min(myData$Y)

pred_unscaled <- pred$net.result*(max(myData$Y)-min(myData$Y))+min(myData$Y)

# Calculate the error (y - yhat) residual. 

error11 <- testing_unscaled - pred_unscaled
print("First few rows or error")
print(head(error11))
# Calculate SSE

sse11 <-    sum( (error11 )^2 )
print(sse11)
# Calculate RMSE

rmse11 <- sqrt(mean((error11)^2))
print(rmse11)
# Calculate R squared

rsquared11 <- 1 - (sum((error11)^2)/sum((testing_unscaled-mean(testing_unscaled))^2))
print(rsquared11)

result <- c(sse11, rmse11,rsquared11)
return(result)
}

Hidden Layer Variation ONE

We have randomly chosen 1 hidden layer with with 3 units. This variation therefore consists of 5 units in input layer 1 hidden layer ( 3 units respectively) 1 output layer

Evaluation for Sample variation 1 (Folds 1-9)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-9
#Testing is fold 10
training1 <- myData_scaled[-c(4501:5000),]
testing1 <- myData_scaled[-c(1:4500),]
head(training1)
##          X1   X2          X3        X4            X5         Y
## 1 0.8056571 0.75 0.116838779 0.6250000  9.461190e-28 0.7992890
## 2 0.3777789 1.00 0.095021001 1.0000000  4.553774e-05 0.3926656
## 3 0.4889333 0.75 0.289022429 0.6250000  1.222756e-33 0.4748692
## 4 0.5388921 1.00 0.125182075 1.0000000  3.606392e-04 0.5563744
## 5 0.5168771 0.50 0.162039712 0.3333333 1.318726e-144 0.4859757
## 6 0.5627951 0.75 0.007294021 0.6250000  1.218600e-20 0.5511979
# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result1 <- generateNN(training1, testing1, layer1)
## [1] "First few rows or error"
##               [,1]
## 4501  -9.678580631
## 4502  16.566258486
## 4503  -6.006768962
## 4504  -1.506585070
## 4505 -13.290904631
## 4506 -14.504545390
## [1] 61698.65641
## [1] 11.10843431
## [1] 0.9993138049
#insert plot

The (Coefficient of determination) R2 is 0.9993484639 R squared goes from 0 to 1. If R2 is towards 0, it means bad fit but it’s towards 1, we have a good fit. It goes towards 1. This means we have a good fit.

Evaluation for Sample variation 2 (Folds 1-8 and 10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-8 and 10
#Testing is fold 9
training2 <- myData_scaled[-c(4001:4500),]
testing2 <- myData_scaled[4001:4500,]
head(training2)
##             X1   X2             X3           X4               X5
## 1 0.8056571169 0.75 0.116838779334 0.6250000000  9.461190206e-28
## 2 0.3777788765 1.00 0.095021001034 1.0000000000  4.553774491e-05
## 3 0.4889332840 0.75 0.289022429088 0.6250000000  1.222756055e-33
## 4 0.5388920559 1.00 0.125182075415 1.0000000000  3.606392252e-04
## 5 0.5168770614 0.50 0.162039711644 0.3333333333 1.318725834e-144
## 6 0.5627950794 0.75 0.007294021191 0.6250000000  1.218599943e-20
##              Y
## 1 0.7992889885
## 2 0.3926655682
## 3 0.4748691630
## 4 0.5563743736
## 5 0.4859756787
## 6 0.5511979467
# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result2 <- generateNN(training2, testing2, layer1)
## [1] "First few rows or error"
##               [,1]
## 4001  -6.164828865
## 4002  20.142466913
## 4003 -15.965555165
## 4004  11.169426958
## 4005   5.013561884
## 4006 -20.704476266
## [1] 71091.59731
## [1] 11.92405949
## [1] 0.9992708033

Evaluation for Sample variation 3 (Folds 1-7 and 9-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-7 and 9-10
#Testing is fold 8
training3 <- myData_scaled[-c(3501:4000),]
testing3 <- myData_scaled[3501:4000,]
head(training3)
##             X1   X2             X3           X4               X5
## 1 0.8056571169 0.75 0.116838779334 0.6250000000  9.461190206e-28
## 2 0.3777788765 1.00 0.095021001034 1.0000000000  4.553774491e-05
## 3 0.4889332840 0.75 0.289022429088 0.6250000000  1.222756055e-33
## 4 0.5388920559 1.00 0.125182075415 1.0000000000  3.606392252e-04
## 5 0.5168770614 0.50 0.162039711644 0.3333333333 1.318725834e-144
## 6 0.5627950794 0.75 0.007294021191 0.6250000000  1.218599943e-20
##              Y
## 1 0.7992889885
## 2 0.3926655682
## 3 0.4748691630
## 4 0.5563743736
## 5 0.4859756787
## 6 0.5511979467
# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result3 <- generateNN(training3, testing3, layer1)
## [1] "First few rows or error"
##               [,1]
## 3501 -11.574625076
## 3502   0.382353603
## 3503  -6.890625905
## 3504   7.556536697
## 3505 -13.902973063
## 3506   3.747869868
## [1] 72883.11804
## [1] 12.07336888
## [1] 0.9992990731

Evaluation for Sample variation 4 (Folds 1-6 and 8-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-6 and 8-10
#Testing is fold 7
training4 <- myData_scaled[-c(3001:3500),]
testing4 <- myData_scaled[3001:3500,]
head(training4)
##             X1   X2             X3           X4               X5
## 1 0.8056571169 0.75 0.116838779334 0.6250000000  9.461190206e-28
## 2 0.3777788765 1.00 0.095021001034 1.0000000000  4.553774491e-05
## 3 0.4889332840 0.75 0.289022429088 0.6250000000  1.222756055e-33
## 4 0.5388920559 1.00 0.125182075415 1.0000000000  3.606392252e-04
## 5 0.5168770614 0.50 0.162039711644 0.3333333333 1.318725834e-144
## 6 0.5627950794 0.75 0.007294021191 0.6250000000  1.218599943e-20
##              Y
## 1 0.7992889885
## 2 0.3926655682
## 3 0.4748691630
## 4 0.5563743736
## 5 0.4859756787
## 6 0.5511979467
# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result4 <- generateNN(training4, testing4, layer1)
## [1] "First few rows or error"
##              [,1]
## 3001  1.355642473
## 3002  4.069425300
## 3003  3.903711036
## 3004 14.654555347
## 3005 11.994187441
## 3006 16.490535235
## [1] 68369.21144
## [1] 11.69352055
## [1] 0.999228847

Evaluation for Sample variation 5 (Folds 1-5 and 7-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

#Training is fold 1-5 and 7-10
#Testing is fold 6
training5 <- myData_scaled[-c(2501:3000),]
testing5 <- myData_scaled[2501:3000,]
#head(training5)

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result5 <- generateNN(training5, testing5, layer1)
## [1] "First few rows or error"
##              [,1]
## 2501 21.968223202
## 2502  3.469630502
## 2503 12.210576446
## 2504  5.380329289
## 2505 -2.224773773
## 2506 22.381846702
## [1] 63969.88052
## [1] 11.31104597
## [1] 0.9992648934

Evaluation for Sample variation 6 (Folds 1-4 and 6-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-4 and 6-10
#Testing is fold 5
training6 <- myData_scaled[-c(2001:2500),]
testing6 <- myData_scaled[2001:2500,]
#head(training6)

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result6 <- generateNN(training6, testing6, layer1)
## [1] "First few rows or error"
##              [,1]
## 2001 -2.164765324
## 2002 18.531900808
## 2003 -4.792088267
## 2004 17.525746546
## 2005 13.565403494
## 2006  7.212744325
## [1] 70583.32534
## [1] 11.88135727
## [1] 0.9992365871

Evaluation for Sample variation 7 (Folds 1-3 and 5-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-3 and 5-10
#Testing is fold 4
training7 <- myData_scaled[-c(1501:2000),]
testing7 <- myData_scaled[1501:2000,]
#head(training7)

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result7 <- generateNN(training7, testing7, layer1)
## [1] "First few rows or error"
##              [,1]
## 1501 -8.200838302
## 1502  7.661568225
## 1503  3.260839515
## 1504 -5.773825069
## 1505 -7.332024698
## 1506  5.430972162
## [1] 76100.7892
## [1] 12.33700038
## [1] 0.9992152766

Evaluation for Sample variation 8 (Folds 1-2 and 4-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-2 and 4-10
#Testing is fold 3
training8 <- myData_scaled[-c(1001:1500),]
testing8 <- myData_scaled[1001:1500,]
#head(training8)

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result8 <- generateNN(training8, testing8, layer1)
## [1] "First few rows or error"
##                 [,1]
## 1001 -11.80451220719
## 1002  15.91609297840
## 1003   0.08200110272
## 1004  12.59072800055
## 1005  28.26060635611
## 1006   6.87553599235
## [1] 68993.74849
## [1] 11.74680795
## [1] 0.9992283297

Evaluation for Sample variation 9 (Folds 1 and 3-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1 and 3-10
#Testing is fold 2
training9 <- myData_scaled[-c(501:1000),]
testing9 <- myData_scaled[501:1000,]
#head(training9)

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result9 <- generateNN(training9, testing9, layer1)
## [1] "First few rows or error"
##              [,1]
## 501  -2.265391376
## 502  12.166059067
## 503  10.201680601
## 504  -5.934983147
## 505 -10.143396142
## 506  16.900475555
## [1] 63397.01531
## [1] 11.26028555
## [1] 0.9993273123

Evaluation for Sample variation 10 (Folds 2-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 2-10
#Testing is fold 1
training10 <- myData_scaled[-c(1:500),]
testing10 <- myData_scaled[1:500,]
head(training10)
##               X1   X2            X3    X4
## 501 0.5459894805 1.00 0.02005542879 1.000
## 502 0.4566163897 0.00 0.26889002085 0.000
## 503 0.5259734019 0.25 0.14566084638 0.125
## 504 0.5925456380 1.00 0.31385720396 1.000
## 505 0.5141328910 0.25 0.01837543203 0.125
## 506 0.5327420100 0.00 0.12327029150 0.000
##                                            X5            Y
## 501 0.000000000000000413809199991554069750843 0.5612742000
## 502 0.000035095300171007091309238051790586610 0.4096830978
## 503 0.000012007498796669775309280125963340424 0.4873074890
## 504 0.000010244601835718024869579904911898183 0.6084799188
## 505 0.000000000000000000000000000000679562986 0.4698171104
## 506 0.107440311089807699618248193473846185952 0.4891882254
# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer1 <- 3
var1_result10 <- generateNN(training10, testing10, layer1)
## [1] "First few rows or error"
##            [,1]
## 1  18.810246785
## 2   7.402942154
## 3 -22.010847078
## 4   5.429510314
## 5 -14.928208373
## 6 -16.690482379
## [1] 61876.33638
## [1] 11.12441786
## [1] 0.9992396382

Visualizing the ANN model - 5 inputs 1 hidden layers with 3 units and 1 output layer

Results from ANN with 5 unit input layer, 1 hidden layer with 3 units and 1 output layer

test_folds <- c(1,2,3,4,5,6,7,8,9,10)
one_hidden_layer <- c(1,1,1,1,1,1,1,1,1,1)

one_h_layer_result <- rbind(var1_result10, var1_result9, var1_result8, var1_result7, var1_result6, var1_result5, var1_result4, var1_result3, var1_result2, var1_result1)

one_h_layer_result_df <- data.frame(one_h_layer_result)

result1 <- data.frame(test_folds, one_hidden_layer, one_h_layer_result_df$X1, one_h_layer_result_df$X2, one_h_layer_result_df$X3)
names(result1) <- c("Testing folds", "No. of Hidden Layers", "SSE", "RMSE", "R2")
kable(result1)
Testing folds No. of Hidden Layers SSE RMSE R2
1 1 61876.33638 11.12441786 0.9992396382
2 1 63397.01531 11.26028555 0.9993273123
3 1 68993.74849 11.74680795 0.9992283297
4 1 76100.78920 12.33700038 0.9992152766
5 1 70583.32534 11.88135727 0.9992365871
6 1 63969.88052 11.31104597 0.9992648934
7 1 68369.21144 11.69352055 0.9992288470
8 1 72883.11804 12.07336888 0.9992990731
9 1 71091.59731 11.92405949 0.9992708033
10 1 61698.65641 11.10843431 0.9993138049

From the table above, it can be observed that the neural network produced when fold 1-9 were use as training produced the least SSE and RMSE when the resulting model was tested o the 10th fold (i.e. last 500 rows). We record SSE = 61698.65641, RMSE = 11.10843431 and R2 = 0.999313804 which is the largest and closest to 1. This means a good fit. However, in cross-validation we are interested in the average of the prediction error to produce a more accurate estimate of model performance so the average SSE, RMSE and R2 (Rsquared) for this experiment is calculated below SSE = 66700.9437230567, RMSE = 11.5439798062 and R2 = 0.9992756949. This is a good fit.

result1_ave <- sapply(result1[,3:5], mean)
result1_ave
##              SSE             RMSE               R2 
## 67896.3678441038    11.6460298214     0.9992624566

Task 2: Vary the number of hidden layers and units per layer and repeat the above step in order to obtain the best model as judged by the metrics SSE, the root mean squared error, and R2.

Hidden Layer Variation TWO

We have randomly chosen 2 layers with with 4 and 3 units, respectively. This variation therefore consists of 5 units in input layer 2 hidden layers (4 and 3 units respectively) 1 output layer

Evaluation for Sample variation 1 (Folds 1-9)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-9
#Testing is fold 10

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result1 <- generateNN(training1, testing1, layer2)
## [1] "First few rows or error"
##                [,1]
## 4501 -11.2081033287
## 4502  11.9613949277
## 4503  -4.1331515796
## 4504   0.5488184631
## 4505 -12.3204782152
## 4506 -12.5556471169
## [1] 63273.8548
## [1] 11.24934263
## [1] 0.999296286
#insert plot

The (Coefficient of determination) R2 is 0.9993484639 R squared goes from 0 to 1. If R2 is towards 0, it means bad fit but it’s towards 1, we have a good fit. It goes towards 1. This means we have a good fit.

Evaluation for Sample variation 2 (Folds 1-8 and 10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-8 and 10
#Testing is fold 9

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result2 <- generateNN(training2, testing2, layer2)
## [1] "First few rows or error"
##               [,1]
## 4001  -4.685174298
## 4002  26.470992628
## 4003 -22.570744639
## 4004   3.650241075
## 4005   7.224301361
## 4006 -18.115297880
## [1] 112072.642
## [1] 14.97148236
## [1] 0.9988504548

Evaluation for Sample variation 3 (Folds 1-7 and 9-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-7 and 9-10
#Testing is fold 8

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result3 <- generateNN(training3, testing3, layer2)
## [1] "First few rows or error"
##               [,1]
## 3501 -17.322963632
## 3502  -1.536026500
## 3503  -2.962242041
## 3504  -5.079417455
## 3505  -2.612334064
## 3506   4.061875494
## [1] 70607.45317
## [1] 11.88338783
## [1] 0.9993209585

Evaluation for Sample variation 4 (Folds 1-6 and 8-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-6 and 8-10
#Testing is fold 7

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result4 <- generateNN(training4, testing4, layer2)
## [1] "First few rows or error"
##              [,1]
## 3001 -3.915405549
## 3002  3.417289375
## 3003  5.427432945
## 3004 16.161755992
## 3005 14.723318767
## 3006 16.887164824
## [1] 84999.95079
## [1] 13.03840104
## [1] 0.9990412648

Evaluation for Sample variation 5 (Folds 1-5 and 7-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

#Training is fold 1-5 and 7-10
#Testing is fold 6


# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result5 <- generateNN(training5, testing5, layer2)
## [1] "First few rows or error"
##              [,1]
## 2501 11.296758423
## 2502  1.341383191
## 2503  2.900145620
## 2504 12.270791464
## 2505 -4.487478111
## 2506 18.364105034
## [1] 61694.01541
## [1] 11.10801651
## [1] 0.9992910463

Evaluation for Sample variation 6 (Folds 1-4 and 6-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-4 and 6-10
#Testing is fold 5


# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result6 <- generateNN(training6, testing6, layer2)
## [1] "First few rows or error"
##              [,1]
## 2001 -2.289204494
## 2002 18.385754281
## 2003 -6.334068336
## 2004  3.376054406
## 2005 13.062251121
## 2006  7.008280962
## [1] 108728.3428
## [1] 14.74641263
## [1] 0.9988240195

Evaluation for Sample variation 7 (Folds 1-3 and 5-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-3 and 5-10
#Testing is fold 4


# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result7 <- generateNN(training7, testing7, layer2)
## [1] "First few rows or error"
##               [,1]
## 1501  -3.506555833
## 1502  12.381991871
## 1503   7.668513243
## 1504 -10.698216399
## 1505 -11.289329830
## 1506   0.223658176
## [1] 118341.6126
## [1] 15.38451251
## [1] 0.9987797048

Evaluation for Sample variation 8 (Folds 1-2 and 4-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-2 and 4-10
#Testing is fold 3


# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result8 <- generateNN(training8, testing8, layer2)
## [1] "First few rows or error"
##               [,1]
## 1001 -9.6819453585
## 1002 11.8681119151
## 1003 -3.4102649786
## 1004 16.4210963311
## 1005 37.3532457963
## 1006  0.4409993736
## [1] 83473.00296
## [1] 12.92075872
## [1] 0.9990663845

Evaluation for Sample variation 9 (Folds 1 and 3-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1 and 3-10
#Testing is fold 2


# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result9 <- generateNN(training9, testing9, layer2)
## [1] "First few rows or error"
##             [,1]
## 501 -1.767205532
## 502 10.268800763
## 503 13.941112705
## 504 -9.121038058
## 505 -5.595790951
## 506  8.798694638
## [1] 84589.95444
## [1] 13.00691773
## [1] 0.9991024401

Evaluation for Sample variation 10 (Folds 2-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 2-10
#Testing is fold 1

# train our model using the generateNN() function with  2 hidden layers (4 and 3 units respectively)
layer2 <- c(4,3)
var2_result10 <- generateNN(training10, testing10, layer2)
## [1] "First few rows or error"
##            [,1]
## 1 -12.676633856
## 2   7.740360308
## 3 -17.506900779
## 4   5.956160671
## 5 -11.580908402
## 6 -14.137902846
## [1] 57426.69446
## [1] 10.71696734
## [1] 0.9992943173

Visualizing the ANN model - 5 input layer, 2 hidden layer with 4 units and 3 units respectively and 1 output layer

Results from ANN with 5 unit input layer, 2 hidden layer with 4 units and 3 units respectively and 1 output layer

test_folds <- c(1,2,3,4,5,6,7,8,9,10)
two_hidden_layers <- c(2,2,2,2,2,2,2,2,2,2)

two_h_layer_result <- rbind(var2_result10, var2_result9, var2_result8, var2_result7, var2_result6, var2_result5, var2_result4, var2_result3, var2_result2, var2_result1)

two_h_layer_result_df <- data.frame(two_h_layer_result)

result2 <- data.frame(test_folds, two_hidden_layers, two_h_layer_result_df$X1, two_h_layer_result_df$X2, two_h_layer_result_df$X3)
names(result2) <- c("Testing folds", "No. of Hidden Layers", "SSE", "RMSE", "R2")
kable(result2)
Testing folds No. of Hidden Layers SSE RMSE R2
1 2 57426.69446 10.71696734 0.9992943173
2 2 84589.95444 13.00691773 0.9991024401
3 2 83473.00296 12.92075872 0.9990663845
4 2 118341.61256 15.38451251 0.9987797048
5 2 108728.34278 14.74641263 0.9988240195
6 2 61694.01541 11.10801651 0.9992910463
7 2 84999.95079 13.03840104 0.9990412648
8 2 70607.45317 11.88338783 0.9993209585
9 2 112072.64199 14.97148236 0.9988504548
10 2 63273.85480 11.24934263 0.9992962860

From the table above, it can be observed that the neural network produced when fold 1-6 and 8-10 were use as training produced the least SSE and RMSE when the resulting model was tested on the 7th fold We record SSE = 55183.52607, RMSE = 10.50557243 and R2 = 0.9993775715 which is the largest and closest to 1. This means a good fit.

Calculating the average of all 10 fold cross validation results, we have: 73029.1061324136 12.0407797366 0.9992100987 It seems like adding one more hidden layer produced a model with a bit higher SSE and RMSE than the previous experiment.

result2_ave <- sapply(result2[,3:5], mean)
result2_ave
##              SSE             RMSE               R2 
## 84520.7523355624    12.9026199298     0.9990866876

For the next experiment, lets use 3 hidden layers with 4 units, 3 units and 5 units respectively.

Hidden Layer Variation THREE

We have randomly chosen 3 layers with with 5, 3 and 2 units, respectively. This variation therefore consists of 5 units in input layer 3 hidden layers (3, 5 and 2 units respectively) 1 output layer

Evaluation for Sample variation 1 (Folds 1-9)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-9
#Testing is fold 10

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)
layer3 <- c(3, 5, 2)
var3_result1 <- generateNN(training1, testing1, layer3)
## [1] "First few rows or error"
##                [,1]
## 4501  -6.4376523420
## 4502  19.0604425370
## 4503  -0.5032938382
## 4504   0.4345148600
## 4505 -12.8117134863
## 4506  -7.1652363515
## [1] 61677.27374
## [1] 11.10650924
## [1] 0.9993140427
#insert plot

The (Coefficient of determination) R2 is 0.9993484639 R squared goes from 0 to 1. If R2 is towards 0, it means bad fit but it’s towards 1, we have a good fit. It goes towards 1. This means we have a good fit.

Evaluation for Sample variation 2 (Folds 1-8 and 10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-8 and 10
#Testing is fold 9


# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)
#layer3 <- c(3, 5, 2)
var3_result2 <- generateNN(training2, testing2, layer3)
## [1] "First few rows or error"
##               [,1]
## 4001  -4.856413534
## 4002  28.308269483
## 4003 -21.988791466
## 4004  -1.828107405
## 4005   8.702959457
## 4006 -18.433524234
## [1] 114595.7343
## [1] 15.13907093
## [1] 0.9988245751

Evaluation for Sample variation 3 (Folds 1-7 and 9-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-7 and 9-10
#Testing is fold 8


# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)

#layer3 <- c(3, 5, 2)
var3_result3 <- generateNN(training3, testing3, layer3)
## [1] "First few rows or error"
##                 [,1]
## 3501 -11.11461762898
## 3502  -0.01623973098
## 3503  -2.18081963118
## 3504  14.98947849173
## 3505 -12.10578856176
## 3506   0.91377193472
## [1] 136021.2492
## [1] 16.49371087
## [1] 0.9986918651

Evaluation for Sample variation 4 (Folds 1-6 and 8-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-6 and 8-10
#Testing is fold 7

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)

#layer3 <- c(3, 5, 2)
var3_result4 <- generateNN(training4, testing4, layer3)
## [1] "First few rows or error"
##              [,1]
## 3001 -1.585927520
## 3002 10.300656347
## 3003 -1.329685576
## 3004 10.054488306
## 3005  8.371973114
## 3006 21.559799875
## [1] 56520.32444
## [1] 10.6320576
## [1] 0.9993624935

Evaluation for Sample variation 5 (Folds 1-5 and 7-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

#Training is fold 1-5 and 7-10
#Testing is fold 6

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)

#layer3 <- c(3, 5, 2)
var3_result5 <- generateNN(training5, testing5, layer3)
## [1] "First few rows or error"
##              [,1]
## 2501 16.620117074
## 2502  6.753631179
## 2503  6.669251281
## 2504 10.173860242
## 2505  1.293026721
## 2506 25.075445625
## [1] 54496.06171
## [1] 10.43992928
## [1] 0.9993737613

Evaluation for Sample variation 6 (Folds 1-4 and 6-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-4 and 6-10
#Testing is fold 5

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)

#layer3 <- c(3, 5, 2)
var3_result6 <- generateNN(training6, testing6, layer3)
## [1] "First few rows or error"
##              [,1]
## 2001 -2.028943208
## 2002 23.668251371
## 2003 -4.305482203
## 2004  5.006822737
## 2005 13.678758257
## 2006 13.842870932
## [1] 96334.66086
## [1] 13.88053752
## [1] 0.9989580667

Evaluation for Sample variation 7 (Folds 1-3 and 5-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-3 and 5-10
#Testing is fold 4


# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)

#layer3 <- c(3, 5, 2)
var3_result7 <- generateNN(training7, testing7, layer3)
## [1] "First few rows or error"
##                [,1]
## 1501  -9.7647267767
## 1502   9.7061145045
## 1503   0.3965205117
## 1504 -10.4277269287
## 1505  -4.7736140802
## 1506   8.7069810427
## [1] 181008.8917
## [1] 19.02676492
## [1] 0.9981335028

Evaluation for Sample variation 8 (Folds 1-2 and 4-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1-2 and 4-10
#Testing is fold 3


# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)
#layer3 <- c(3, 5, 2)
var3_result8 <- generateNN(training8, testing8, layer3)
## [1] "First few rows or error"
##               [,1]
## 1001 -12.791527535
## 1002  18.114763382
## 1003  -8.463425198
## 1004  16.658516421
## 1005  31.882953427
## 1006   6.372872624
## [1] 74198.39581
## [1] 12.18182218
## [1] 0.9991701176

Evaluation for Sample variation 9 (Folds 1 and 3-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 1 and 3-10
#Testing is fold 2

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)
#layer3 <- c(3, 5, 2)
var3_result9 <- generateNN(training9, testing9, layer3)
## [1] "First few rows or error"
##              [,1]
## 501   5.472550922
## 502   9.342562994
## 503   7.391604373
## 504   1.786910379
## 505 -15.030792421
## 506  17.523883980
## [1] 72044.14224
## [1] 12.00367796
## [1] 0.9992355601

Evaluation for Sample variation 10 (Folds 2-10)

Determine the estimated 𝑦 values. Calculate the error, i.e., 𝑦 − 𝑦, and then compute the sum of squared errors (SSE), the root mean squared error, and R2.

# Training is fold 2-10
#Testing is fold 1

# train our model using the generateNN() function with  3 hidden layers (3, 5 and 2 units respectively)
#layer3 <- c(3, 5, 2)
var3_result10 <- generateNN(training10, testing10, layer3)
## [1] "First few rows or error"
##            [,1]
## 1 -17.165662466
## 2  10.142898737
## 3 -20.580441544
## 4   6.139163559
## 5 -14.915076756
## 6 -13.544129608
## [1] 57944.9052
## [1] 10.76521298
## [1] 0.9992879493

Visualizing the ANN model - 5 input layer, 3 hidden layer with 3, 5 and 2 units respectively and 1 output layer

Results from ANN with 5 unit input layer, 3 hidden layer with 3, 5 and 2 units respectively and 1 output layer

test_folds <- c(1,2,3,4,5,6,7,8,9,10)
three_hidden_layer <- c(3,3,3,3,3,3,3,3,3,3)

three_h_layer_result <- rbind(var3_result10, var3_result9, var3_result8, var3_result7, var3_result6, var3_result5, var3_result4, var3_result3, var3_result2, var3_result1)

three_h_layer_result_df <- data.frame(three_h_layer_result)

result3 <- data.frame(test_folds, three_hidden_layer, three_h_layer_result_df$X1, three_h_layer_result_df$X2, three_h_layer_result_df$X3)
names(result3) <- c("Testing folds", "No. of Hidden Layers", "SSE", "RMSE", "R2")
kable(result3)
Testing folds No. of Hidden Layers SSE RMSE R2
1 3 57944.90520 10.76521298 0.9992879493
2 3 72044.14224 12.00367796 0.9992355601
3 3 74198.39581 12.18182218 0.9991701176
4 3 181008.89173 19.02676492 0.9981335028
5 3 96334.66086 13.88053752 0.9989580667
6 3 54496.06171 10.43992928 0.9993737613
7 3 56520.32444 10.63205760 0.9993624935
8 3 136021.24918 16.49371087 0.9986918651
9 3 114595.73435 15.13907093 0.9988245751
10 3 61677.27374 11.10650924 0.9993140427

From the table above, it can be observed that the neural network produced when fold 1-9 were use as training produced the least SSE and RMSE when the resulting model was tested o the 10th fold (i.e. last 500 rows). We record SSE = 56839.80752, RMSE = 10.66206430 and R2 = 0.9993468282 which is the largest and closest to 1. This means a good fit. We are interested in the average of the prediction error to produce a more accurate estimate of model performance so the average SSE, RMSE and R2 (Rsquared) for this experiment is calculated below SSE = 89355.7622415918, RMSE = 13.0711912368 and R2 = 0.9990470623 This is a good fit.

result3_ave <- sapply(result3[,3:5], mean)
result3_ave
##              SSE             RMSE               R2 
## 90484.1639273109    13.1669293482     0.9990351934

Summary

Evaluation for variations of hidden layer

One hidden layer produced SSE = 89355.7622415918, RMSE = 13.0711912368 and R2 = 0.9990470623. Two hidden layer produced SSE = 89355.7622415918, RMSE = 13.0711912368 and R2 = 0.9990470623. three hidden layer produced SSE = 89355.7622415918, RMSE = 13.0711912368 and R2 = 0.9990470623.

Choose the best!

Comparing with Multiple Regression Model

Re-run your multiple regression model that you determined in the regression project and obtain all the residuals. Calculate the SSE, the root mean squared error, and R2. Compare the neural net model with the multiple regression model from the point of accuracy as measured by SSE, the root mean squared error, and R2.

The final model chosen from the multiple regression experiement used variables X1 and X2 to predict Y.

multivariateModel_final <- lm(Y ~ X1 + X2, data= myData)
summary(multivariateModel_final)
## 
## Call:
## lm(formula = Y ~ X1 + X2, data = myData)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -55.381633 -16.188409  -3.380887  17.931397  58.281323 
## 
## Coefficients:
##                  Estimate    Std. Error   t value               Pr(>|t|)
## (Intercept) -62.921568115   0.739461179  -85.0911 < 0.000000000000000222
## X1            6.722103826   0.004619965 1455.0117 < 0.000000000000000222
## X2           62.776560110   0.202926922  309.3555 < 0.000000000000000222
##                
## (Intercept) ***
## X1          ***
## X2          ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.44493 on 4997 degrees of freedom
## Multiple R-squared:  0.9977381,  Adjusted R-squared:  0.9977372 
## F-statistic:  1102122 on 2 and 4997 DF,  p-value: < 0.00000000000000022204
# Coefficients and intercept gotten from the model

print(coef(multivariateModel_final)[1])
##  (Intercept) 
## -62.92156811
print(coef(multivariateModel_final)[2])
##          X1 
## 6.722103826
print(coef(multivariateModel_final)[3])
##          X2 
## 62.77656011
predicted_Y <- multivariateModel_final$fitted.values

#Residual
error22 <- myData$Y - predicted_Y

print("First few rows or error")
## [1] "First few rows or error"
print(head(error22))
##            1            2            3            4            5 
## -19.80935649  27.74650868 -29.26925985  27.42739776 -33.78917552 
##            6 
## -24.44842063
# Calculate SSE

sse22 <-    sum( (error22 )^2 )

# Calculate RMSE

rmse22 <- sqrt(mean((error22)^2))

# Calculate R squared

rsquared22 <- 1 - (sum((error22)^2)/sum((myData$Y-mean(myData$Y))^2))

result22 <- c(sse22, rmse22,rsquared22)

print(sse22)
## [1] 2088721.185
print(rmse22)
## [1] 20.43879246
print(rsquared22)
## [1] 0.9977381363
headings <- names(training1)
form <- as.formula(paste("Y ~", paste(headings[!headings %in% "Y"], collapse = " + ")))
nn_model <- neuralnet(form,data=training1,hidden=3,linear.output=T)

pred <- compute(nn_model, testing1[,1:5])

testing_unscaled <- (testing1$Y)*(max(myData$Y)-min(myData$Y))+min(myData$Y)

pred_unscaled <- pred$net.result*(max(myData$Y)-min(myData$Y))+min(myData$Y)

par(mfrow=c(1,2))

plot(testing_unscaled,pred_unscaled,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')

plot(myData$Y,predicted_Y,col='blue',main='Real vs predicted Multiple LM',pch=18, cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)

By visually inspecting the plot we can see that the predictions made by the neural network are (in general) more concetrated around the line than those made by the linear model.

The final result of the multiple regression model shows that the ANN performs far better than multiple regression model on this dataset in terms of analysis lower sse, rmse and better R-squared values.