The Concrete data set is being used for Artificial neural networs modelling by using the ingredients used for the concrete and predict the strength of concrete.
library(neuralnet) # regression
## Warning: package 'neuralnet' was built under R version 3.5.1
library(nnet) # classification
## Warning: package 'nnet' was built under R version 3.5.1
library(NeuralNetTools)
## Warning: package 'NeuralNetTools' was built under R version 3.5.1
# Read the data
concrete <- read.csv(file.choose())
str(concrete)
## 'data.frame': 1030 obs. of 9 variables:
## $ cement : num 540 540 332 332 199 ...
## $ slag : num 0 0 142 142 132 ...
## $ ash : num 0 0 0 0 0 0 0 0 0 0 ...
## $ water : num 162 162 228 228 192 228 228 228 228 228 ...
## $ superplastic: num 2.5 2.5 0 0 0 0 0 0 0 0 ...
## $ coarseagg : num 1040 1055 932 932 978 ...
## $ fineagg : num 676 676 594 594 826 ...
## $ age : int 28 28 270 365 360 90 365 28 28 28 ...
## $ strength : num 80 61.9 40.3 41 44.3 ...
# Exploratory data Analysis :
hist(concrete$cement, prob = T, breaks = 30)
lines(density(concrete$cement))
summary(concrete$cement)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 102.0 192.4 272.9 281.2 350.0 540.0
hist(concrete$slag, prob = T, breaks = 30)
lines(density(concrete$slag))
summary(concrete$slag)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 22.0 73.9 142.9 359.4
# Similar Histogram for eight other features confirms that the
# data has different scales and needs a normalization.
summary(concrete) # Confirms on the different scale and demands normalizing the data.
## cement slag ash water
## Min. :102.0 Min. : 0.0 Min. : 0.00 Min. :121.8
## 1st Qu.:192.4 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.:164.9
## Median :272.9 Median : 22.0 Median : 0.00 Median :185.0
## Mean :281.2 Mean : 73.9 Mean : 54.19 Mean :181.6
## 3rd Qu.:350.0 3rd Qu.:142.9 3rd Qu.:118.30 3rd Qu.:192.0
## Max. :540.0 Max. :359.4 Max. :200.10 Max. :247.0
## superplastic coarseagg fineagg age
## Min. : 0.000 Min. : 801.0 Min. :594.0 Min. : 1.00
## 1st Qu.: 0.000 1st Qu.: 932.0 1st Qu.:731.0 1st Qu.: 7.00
## Median : 6.400 Median : 968.0 Median :779.5 Median : 28.00
## Mean : 6.205 Mean : 972.9 Mean :773.6 Mean : 45.66
## 3rd Qu.:10.200 3rd Qu.:1029.4 3rd Qu.:824.0 3rd Qu.: 56.00
## Max. :32.200 Max. :1145.0 Max. :992.6 Max. :365.00
## strength
## Min. : 2.33
## 1st Qu.:23.71
## Median :34.45
## Mean :35.82
## 3rd Qu.:46.13
## Max. :82.60
# Apply Normalization technique to the whole dataset :
normalize<-function(x){
return ( (x-min(x))/(max(x)-min(x)))
}
concrete_norm<-as.data.frame(lapply(concrete,FUN=normalize))
summary(concrete_norm$strength) # Normalized form of strength
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2664 0.4001 0.4172 0.5457 1.0000
summary(concrete$strength) # Orginal strength value
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.33 23.71 34.45 35.82 46.13 82.60
# Data Partition
set.seed(123)
ind <- sample(2, nrow(concrete_norm), replace = TRUE, prob = c(0.7,0.3))
concrete_train <- concrete_norm[ind==1,]
concrete_test <- concrete_norm[ind==2,]
# Creating a neural network model on training data
concrete_model <- neuralnet(strength~cement+slag+ash+water+superplastic+coarseagg+fineagg+age,data = concrete_train)
str(concrete_model)
## List of 13
## $ call : language neuralnet(formula = strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data | __truncated__
## $ response : num [1:729, 1] 0.967 0.473 0.557 0.515 0.542 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:729] "1" "3" "6" "7" ...
## .. ..$ : chr "strength"
## $ covariate : num [1:729, 1:8] 1 0.526 0.374 0.635 0.374 ...
## $ model.list :List of 2
## ..$ response : chr "strength"
## ..$ variables: chr [1:8] "cement" "slag" "ash" "water" ...
## $ err.fct :function (x, y)
## ..- attr(*, "type")= chr "sse"
## $ act.fct :function (x)
## ..- attr(*, "type")= chr "logistic"
## $ linear.output : logi TRUE
## $ data :'data.frame': 729 obs. of 9 variables:
## ..$ cement : num [1:729] 1 0.526 0.374 0.635 0.374 ...
## ..$ slag : num [1:729] 0 0.396 0.317 0.264 0.317 ...
## ..$ ash : num [1:729] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ water : num [1:729] 0.321 0.848 0.848 0.848 0.848 ...
## ..$ superplastic: num [1:729] 0.0776 0 0 0 0 ...
## ..$ coarseagg : num [1:729] 0.695 0.381 0.381 0.381 0.381 ...
## ..$ fineagg : num [1:729] 0.206 0 0.191 0 0.191 ...
## ..$ age : num [1:729] 0.0742 0.739 0.2445 1 0.0742 ...
## ..$ strength : num [1:729] 0.967 0.473 0.557 0.515 0.542 ...
## $ net.result :List of 1
## ..$ : num [1:729, 1] 0.616 0.692 0.524 0.692 0.244 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:729] "1" "3" "6" "7" ...
## .. .. ..$ : NULL
## $ weights :List of 1
## ..$ :List of 2
## .. ..$ : num [1:9, 1] 2.092 -3.815 -2.306 -0.771 1.387 ...
## .. ..$ : num [1:2, 1] 0.693 -0.773
## $ startweights :List of 1
## ..$ :List of 2
## .. ..$ : num [1:9, 1] -0.735 -0.132 0.31 -1.04 -0.184 ...
## .. ..$ : num [1:2, 1] 1.11 0.55
## $ generalized.weights:List of 1
## ..$ : num [1:729, 1:8] 1.11602 0.0175 2.01297 0.00135 3.89616 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:729] "1" "3" "6" "7" ...
## .. .. ..$ : NULL
## $ result.matrix : num [1:14, 1] 4.8342 0.0096 2251 2.0915 -3.8154 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:14] "error" "reached.threshold" "steps" "Intercept.to.1layhid1" ...
## .. ..$ : chr "1"
## - attr(*, "class")= chr "nn"
plot(concrete_model, rep = "best")
summary(concrete_model)
## Length Class Mode
## call 3 -none- call
## response 729 -none- numeric
## covariate 5832 -none- numeric
## model.list 2 -none- list
## err.fct 1 -none- function
## act.fct 1 -none- function
## linear.output 1 -none- logical
## data 9 data.frame list
## net.result 1 -none- list
## weights 1 -none- list
## startweights 1 -none- list
## generalized.weights 1 -none- list
## result.matrix 14 -none- numeric
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model, alpha = 0.6)
# Evaluating model performance
set.seed(12323)
model_results <- compute(concrete_model,concrete_test[1:8])
predicted_strength <- model_results$net.result
# Predicted strength Vs Actual Strength of test data.
cor(predicted_strength,concrete_test$strength)
## [,1]
## [1,] 0.8194732924
# since the prediction is in Normalized form, we need to de-normalize it
# to get the actual prediction on strength.
str_max <- max(concrete$strength)
str_min <- min(concrete$strength)
unnormalize <- function(x, min, max) {
return( (max - min)*x + min )
}
ActualStrength_pred <- unnormalize(predicted_strength,str_min,str_max)
head(ActualStrength_pred)
## [,1]
## 2 51.79066063
## 4 57.91625457
## 5 57.90720105
## 8 34.75401033
## 11 44.53277140
## 16 46.98616352
# Improve the model performance :
set.seed(12345)
concrete_model2 <- neuralnet(strength~cement+slag+ash+water+superplastic+
coarseagg+fineagg+age,data= concrete_train,
hidden = 5)
plot(concrete_model2, rep = "best")
summary(concrete_model2)
## Length Class Mode
## call 4 -none- call
## response 729 -none- numeric
## covariate 5832 -none- numeric
## model.list 2 -none- list
## err.fct 1 -none- function
## act.fct 1 -none- function
## linear.output 1 -none- logical
## data 9 data.frame list
## net.result 1 -none- list
## weights 1 -none- list
## startweights 1 -none- list
## generalized.weights 1 -none- list
## result.matrix 54 -none- numeric
model_results2<-compute(concrete_model2,concrete_test[1:8])
predicted_strength2<-model_results2$net.result
cor(predicted_strength2,concrete_test$strength)
## [,1]
## [1,] 0.9289368661
plot(predicted_strength,concrete_test$strength)
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model2, alpha = 0.6)
# SSE(Error) has reduced and training steps had been increased as the number of neurons # under hidden layer are increased