Importing the binary data set
library(readr)
data <- read_csv("~/Neural Network in R/binary.csv")
## Parsed with column specification:
## cols(
## admit = col_double(),
## gre = col_double(),
## gpa = col_double(),
## rank = col_double()
## )
names(data)
## [1] "admit" "gre" "gpa" "rank"
str(data)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 400 obs. of 4 variables:
## $ admit: num 0 1 1 1 0 1 1 0 1 0 ...
## $ gre : num 380 660 800 640 520 760 560 400 540 700 ...
## $ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
## $ rank : num 3 3 1 4 4 2 1 2 3 2 ...
## - attr(*, "spec")=
## .. cols(
## .. admit = col_double(),
## .. gre = col_double(),
## .. gpa = col_double(),
## .. rank = col_double()
## .. )
Min-Max Normalization
data$gre <- (data$gre - min(data$gre))/(max(data$gre) - min(data$gre))
data$gpa <- (data$gpa - min(data$gpa))/(max(data$gpa) - min(data$gpa))
data$rank <- (data$rank - min(data$rank))/(max(data$rank)-min(data$rank))
# viewing how the normalization works plot the histograms of the variables
hist(data$gre)

hist(data$gpa)

hist(data$rank)

Data Partition
set.seed(222)
ind <- sample(2, nrow(data), replace = TRUE, prob = c(0.7, 0.3))
training <- data[ind==1,]
testing <- data[ind==2,]
Neural Networks
using the neuralnet
library(neuralnet)
set.seed(333)
n <- neuralnet(admit~gre+gpa+rank,
data = training,
hidden = 1,#adjust the hidden layers
err.fct = "ce",
linear.output = FALSE)
summary(n)
## Length Class Mode
## call 6 -none- call
## response 281 -none- numeric
## covariate 843 -none- numeric
## model.list 2 -none- list
## err.fct 1 -none- function
## act.fct 1 -none- function
## linear.output 1 -none- logical
## data 4 data.frame list
## exclude 0 -none- NULL
## net.result 1 -none- list
## weights 1 -none- list
## generalized.weights 1 -none- list
## startweights 1 -none- list
## result.matrix 9 -none- numeric
Ploting the neural network diagram
a<-plot(n)
a
## NULL
Prediction
output <- compute(n, training[,-1]) # eliminating the first column
#output
head(output$net.result)
## [,1]
## [1,] 0.3779477
## [2,] 0.6622788
## [3,] 0.1487507
## [4,] 0.3433429
## [5,] 0.1924163
## [6,] 0.2396130
head(training[1,])
## # A tibble: 1 x 4
## admit gre gpa rank
## <dbl> <dbl> <dbl> <dbl>
## 1 1 0.759 0.810 0.667
Node Output Calculations with Sigmoid Activation Function
in4 <- 0.0455 + (0.82344*0.7586206897) + (1.35186*0.8103448276) + (-0.87435*0.6666666667)
out4 <- 1/(1+exp(-in4))
in5 <- -7.06125 +(8.5741*out4)
out5 <- 1/(1+exp(-in5))
Confusion Matrix & Misclassification Error - training data
output <- compute(n, training[,-1])
p1 <- output$net.result
pred1 <- ifelse(p1>0.5, 1, 0)
tab1 <- table(pred1, training$admit)
tab1
##
## pred1 0 1
## 0 172 66
## 1 17 26
1-sum(diag(tab1))/sum(tab1)
## [1] 0.2953737
Confusion Matrix & Misclassification Error - testing data
output <- compute(n, testing[,-1])
p2 <- output$net.result
pred2 <- ifelse(p2>0.5, 1, 0)
tab2 <- table(pred2, testing$admit)
tab2
##
## pred2 0 1
## 0 76 27
## 1 8 8
sum(diag(tab2))/sum(tab2) ## accuracy
## [1] 0.7058824
1-sum(diag(tab2))/sum(tab2) ## misclassification
## [1] 0.2941176
Testing the data set
Confusion Matrix & Misclassification Error - testing data
output <- compute(n, testing[,-1])
p2 <- output$net.result
pred2 <- ifelse(p2>0.5, 1, 0)
tab2 <- table(pred2, testing$admit)
tab2
##
## pred2 0 1
## 0 76 27
## 1 8 8
sum(diag(tab1))/sum(tab1)
## [1] 0.7046263
1-sum(diag(tab1))/sum(tab1)
## [1] 0.2953737
# Confusion Matrix & Misclassification Error - testing data
output <- compute(n, testing[,-1])
p2 <- output$net.result
pred2 <- ifelse(p2>0.5, 1, 0)
tab2 <- table(pred2, testing$admit)
tab2
##
## pred2 0 1
## 0 76 27
## 1 8 8
1-sum(diag(tab2))/sum(tab2)
## [1] 0.2941176