Read in data and examine structure
concrete <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml11/concrete.csv")
str(concrete)
## 'data.frame': 1030 obs. of 9 variables:
## $ cement : num 141 169 250 266 155 ...
## $ slag : num 212 42.2 0 114 183.4 ...
## $ ash : num 0 124.3 95.7 0 0 ...
## $ water : num 204 158 187 228 193 ...
## $ superplastic: num 0 10.8 5.5 0 9.1 0 0 6.4 0 9 ...
## $ coarseagg : num 972 1081 957 932 1047 ...
## $ fineagg : num 748 796 861 670 697 ...
## $ age : int 28 14 28 28 28 90 7 56 28 28 ...
## $ strength : num 29.9 23.5 29.2 45.9 18.3 ...
# custom normalization function
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
# apply normalization to entire data frame
concrete_norm <- as.data.frame(lapply(concrete, normalize))
# confirm that the range is now between zero and one
summary(concrete_norm$strength)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2664 0.4001 0.4172 0.5457 1.0000
# create training and test data
concrete_train <- concrete_norm[1:773, ]; dim(concrete_train)
## [1] 773 9
concrete_test <- concrete_norm[774:1030, ]; dim(concrete_test)
## [1] 257 9
# train the neuralnet model
library(neuralnet)
# simple ANN with only a single hidden neuron
set.seed(12345) # to guarantee repeatable results
concrete_model <- neuralnet(formula = strength ~ cement + slag +ash + water + superplastic +
coarseagg + fineagg + age,data = concrete_train)
# visualize the network topology
plot(concrete_model)
# alternative plot
library(NeuralNetTools)
# plotnet
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model, alpha = 0.6)
# obtain model results
model_results <- compute(concrete_model, concrete_test[,1:8])
# obtain predicted strength values
predicted_strength <- model_results$net.result
# examine the correlation between predicted and actual values
cor(predicted_strength, concrete_test$strength) # higher than stated in book 0.7170368646
## [,1]
## [1,] 0.8064655576
# produce actual predictions by
head(predicted_strength)
## [,1]
## 774 0.3258991537
## 775 0.4677425372
## 776 0.2370268181
## 777 0.6718811029
## 778 0.4663428766
## 779 0.4685272270
concrete_train_original_strength <- concrete[1:773,"strength"]
strength_min <- min(concrete_train_original_strength)
strength_max <- max(concrete_train_original_strength)
head(concrete_train_original_strength)
## [1] 29.89 23.51 29.22 45.85 18.29 21.86
# custom normalization function
unnormalize <- function(x, min, max) {
return( (max - min)*x + min )
}
strength_pred <- unnormalize(predicted_strength, strength_min, strength_max)
head(strength_pred)
## [,1]
## 774 28.21291079
## 775 39.47811230
## 776 21.15466990
## 777 55.69079719
## 778 39.36695126
## 779 39.54043237
# a more complex neural network topology with 5 hidden neurons
set.seed(12345) # to guarantee repeatable results
concrete_model2 <- neuralnet(strength ~ cement + slag +
ash + water + superplastic +
coarseagg + fineagg + age,
data = concrete_train, hidden = 5, act.fct = "logistic")
# plot the network
plot(concrete_model2)
# plotnet
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model2, alpha = 0.6)
# evaluate the results as we did before
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength) # higher than stated in book 0.801444583
## [,1]
## [1,] 0.9244533426