This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

## Modeling the Strength of Concrete  ----
## Step 1: Collecting data
## Dataset: Copressive strength of concrete--- obtained from the UCI Machine Learning Data Repository (http://archive.ics.uci.edu/ml)
## Step 2: Exploring and preparing the data ----
# read in data and examine structure
concrete <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml11/concrete.csv")
str(concrete)
'data.frame':   1030 obs. of  9 variables:
 $ cement      : num  141 169 250 266 155 ...
 $ slag        : num  212 42.2 0 114 183.4 ...
 $ ash         : num  0 124.3 95.7 0 0 ...
 $ water       : num  204 158 187 228 193 ...
 $ superplastic: num  0 10.8 5.5 0 9.1 0 0 6.4 0 9 ...
 $ coarseagg   : num  972 1081 957 932 1047 ...
 $ fineagg     : num  748 796 861 670 697 ...
 $ age         : int  28 14 28 28 28 90 7 56 28 28 ...
 $ strength    : num  29.9 23.5 29.2 45.9 18.3 ...
# custom normalization function
normalize <- function(x) { 
  return((x - min(x)) / (max(x) - min(x)))
}
# apply normalization to entire data frame
concrete_norm <- as.data.frame(lapply(concrete, normalize))
# confirm that the range is now between zero and one
summary(concrete_norm$strength)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000000 0.2663511 0.4000872 0.4171915 0.5457207 1.0000000 
# compared to the original minimum and maximum
summary(concrete$strength)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
 2.33000 23.71000 34.44500 35.81796 46.13500 82.60000 
# create training and test data
concrete_train <- concrete_norm[1:773, ]
concrete_test <- concrete_norm[774:1030, ]
## Step 3: Training a model on the data ----
# train the neuralnet model
library(neuralnet)
# simple ANN with only a single hidden neuron
set.seed(12345) # to guarantee repeatable results
concrete_model <- neuralnet(formula = strength ~ cement + slag +
                              ash + water + superplastic + 
                              coarseagg + fineagg + age,
                            data = concrete_train)
# visualize the network topology
plot(concrete_model)
# Reference: http://www.r-bloggers.com/neuralnettools-1-0-0-now-on-cran/
# alternative plot
library(NeuralNetTools)
# plotnet
par(mar = numeric(4), family = 'serif')

plotnet(concrete_model, alpha = 0.6)
## Step 4: Evaluating model performance ----
# obtain model results
model_results <- compute(concrete_model, concrete_test[1:8])
# obtain predicted strength values
predicted_strength <- model_results$net.result
# examine the correlation between predicted and actual values
cor(predicted_strength, concrete_test$strength)   # higher than stated in book 0.7170368646
             [,1]
[1,] 0.8064655576
# produce actual predictions by 
head(predicted_strength)
            [,1]
774 0.3258991537
775 0.4677425372
776 0.2370268181
777 0.6718811029
778 0.4663428766
779 0.4685272270
concrete_train_original_strength <- concrete[1:773,"strength"]
strength_min <- min(concrete_train_original_strength)
strength_max <- max(concrete_train_original_strength)
head(concrete_train_original_strength)
[1] 29.89 23.51 29.22 45.85 18.29 21.86
# custom normalization function
unnormalize <- function(x, min, max) { 
  return( (max - min)*x + min )
}
strength_pred <- unnormalize(predicted_strength, strength_min, strength_max)
strength_pred
             [,1]
774  28.212910787
775  39.478112301
776  21.154669896
777  55.690797192
778  39.366951260
779  39.540432369
780  39.928033889
781  49.040354523
782  27.907103923
783  20.321053693
784   7.266385231
785  46.876247191
786  48.856855398
787  46.703583933
788  45.448376324
789  51.065068074
790  43.680671652
791  28.193980938
792  28.827103782
793  18.345241127
794  18.252462770
795  15.643701142
796  17.801483462
797  23.682825829
798  13.976073367
799  12.194355970
800  33.449926982
801  25.088099888
802  52.607784390
803  24.433849292
804  34.735202333
805  44.689978758
806  43.020408989
807  33.080435982
808  34.849702382
809  47.563249060
810   6.938091659
811  29.327314842
812  52.193794638
813  18.724170879
814  23.403183011
815  31.805906354
816  26.771592010
817  54.848833698
818  26.325671398
819  18.271643621
820  21.589603938
821  19.723934696
822  24.236493867
823  26.151069034
824  44.930065559
825  45.984424330
826  36.569851275
827  18.383419764
828  37.547970377
829  44.080234009
830  17.365663875
831  49.764429240
832  23.644245310
833  18.886834859
834  46.263011829
835  54.475179301
836  20.727657800
837  50.093261598
838  35.938877950
839  55.151231281
840  43.539564013
841  20.696074238
842  44.549415622
843  24.482322674
844  16.201399197
845  42.375778976
846  45.270976016
847  44.638527605
848  10.311700822
849  46.304752045
850  19.307530759
851  55.737413630
852  24.793017815
853  23.814544656
854  33.546539494
855  42.695335181
856  49.811424381
857  48.652348988
858  52.638858641
859  53.166049496
860  24.183085073
861  41.825425595
862  54.927075804
863  51.251536938
864  53.452210772
865  49.787431937
866  29.885878058
867  34.712601224
868  44.396238610
869  15.761370235
870  51.046835680
871   9.046006076
872  18.467300274
873  47.763396508
874  16.520661292
875  46.236355997
876  44.008915499
877  15.788252948
878  35.338644057
879  55.721030895
880  26.239781748
881  54.219040311
882  31.887794120
883  28.775965442
884  21.828939255
885  49.764429240
886  46.302614755
887  44.990113139
888  43.872794257
889  33.613802430
890  49.821748527
891  22.489345534
892  54.731284069
893  53.523149545
894  48.268894377
895  39.425593582
896  25.287008255
897  34.585350422
898  43.126060379
899  46.312025009
900  43.287860282
901  54.227588359
902  55.759561064
903  55.126429340
904  41.980980168
905  23.770483604
906  31.495206543
907  30.872026632
908  45.319018412
909  48.325099603
910  46.844044826
911  16.395384725
912  49.033749656
913  35.609382272
914  53.447559068
915  28.490260269
916  26.097392370
917  34.553462242
918  18.034082548
919  46.017644897
920  55.756594314
921  19.113094271
922  45.880359504
923  33.667746328
924  15.840638297
925  15.250768816
926  25.967077829
927  49.040327527
928  20.250009925
929  53.463888607
930  39.712945454
931  18.635100239
932  43.052489413
933  36.781496963
934  47.367089782
935  36.459125460
936  50.202821978
937  49.768635382
938  47.960542410
939  23.039593502
940  44.787449396
941  22.994965622
942  52.115231571
943  30.231029721
944  46.490832321
945  13.518827249
946  47.463311002
947  33.652021124
948  36.959189963
949  42.675567254
950  43.839085507
951  55.441831705
952  55.730302275
953  42.495361138
954  45.995593485
955  14.559033517
956  53.889321097
957  47.638332031
958  55.756976692
959  14.065750036
960  43.086479175
961  27.286788837
962  21.414631033
963  14.686461194
964  49.955191984
965  41.489814802
966  50.757732386
967  36.406258094
968  50.903995407
969  18.617655592
970  42.779147183
971  27.548360275
972  55.198346170
973  21.614221641
974  43.774007128
975  33.582782000
976  34.494050231
977  31.427147694
978  30.748169174
979  41.713472991
980  52.745480397
981  48.698035754
982  48.463177370
983  39.479649564
984  43.545565848
985  51.163772080
986  55.097430829
987  16.960833436
988  21.228459298
989  16.372967444
990  45.699914067
991  40.015149409
992  54.915041504
993  52.363061591
994  46.703583933
995  22.971496748
996  55.757710774
997  23.665277416
998  30.203783047
999  38.388908212
1000 31.819252685
1001 44.736636088
1002 20.515435999
1003 43.646913655
1004 53.414481990
1005 24.202915410
1006 49.321113911
1007 15.705157900
1008 28.509942443
1009 51.916787243
1010 27.794472365
1011 15.938227741
1012 31.463715541
1013 44.909197894
1014 48.094743404
1015 29.573686435
1016 32.339635939
1017 21.638918442
1018 54.759105089
1019 20.858741569
1020 30.157137857
1021 10.357941496
1022 25.792662773
1023 17.857069663
1024 12.193204624
1025 27.194717549
1026 17.787728300
1027 52.970583241
1028 41.241771843
1029 55.704290534
1030 46.250018028
## Step 5: Improving model performance ----
# a more complex neural network topology with 5 hidden neurons
set.seed(12345) # to guarantee repeatable results
concrete_model2 <- neuralnet(strength ~ cement + slag +
                               ash + water + superplastic + 
                               coarseagg + fineagg + age,
                             data = concrete_train, hidden = 5, act.fct = "logistic")
# plot the network
plot(concrete_model2)
# plotnet
par(mar = numeric(4), family = 'serif')

plotnet(concrete_model2, alpha = 0.6)
# evaluate the results as we did before
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength)  # higher than stated in book 0.801444583
             [,1]
[1,] 0.9244533426
# try different activation function
# a more complex neural network topology with 5 hidden neurons
set.seed(12345) # to guarantee repeatable results
concrete_model2 <- neuralnet(strength ~ cement + slag +
                               ash + water + superplastic + 
                               coarseagg + fineagg + age,
                             data = concrete_train, hidden = 5, act.fct = "tanh")
# evaluate the results as we did before
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength)  
             [,1]
[1,] 0.5741729322
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdA0KICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQNCiAgd29yZF9kb2N1bWVudDogZGVmYXVsdA0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1UUlVFfQ0KDQojIyBNb2RlbGluZyB0aGUgU3RyZW5ndGggb2YgQ29uY3JldGUgIC0tLS0NCg0KIyMgU3RlcCAxOiBDb2xsZWN0aW5nIGRhdGENCiMjIERhdGFzZXQ6IENvcHJlc3NpdmUgc3RyZW5ndGggb2YgY29uY3JldGUtLS0gb2J0YWluZWQgZnJvbSB0aGUgVUNJIE1hY2hpbmUgTGVhcm5pbmcgRGF0YSBSZXBvc2l0b3J5IChodHRwOi8vYXJjaGl2ZS5pY3MudWNpLmVkdS9tbCkNCg0KIyMgU3RlcCAyOiBFeHBsb3JpbmcgYW5kIHByZXBhcmluZyB0aGUgZGF0YSAtLS0tDQojIHJlYWQgaW4gZGF0YSBhbmQgZXhhbWluZSBzdHJ1Y3R1cmUNCmNvbmNyZXRlIDwtIHJlYWQuY3N2KCJodHRwOi8vd3d3LnNjaS5jc3VlYXN0YmF5LmVkdS9+ZXN1ZXNzL2NsYXNzZXMvU3RhdGlzdGljc182NjIwL1ByZXNlbnRhdGlvbnMvbWwxMS9jb25jcmV0ZS5jc3YiKQ0Kc3RyKGNvbmNyZXRlKQ0KDQojIGN1c3RvbSBub3JtYWxpemF0aW9uIGZ1bmN0aW9uDQpub3JtYWxpemUgPC0gZnVuY3Rpb24oeCkgeyANCiAgcmV0dXJuKCh4IC0gbWluKHgpKSAvIChtYXgoeCkgLSBtaW4oeCkpKQ0KfQ0KDQojIGFwcGx5IG5vcm1hbGl6YXRpb24gdG8gZW50aXJlIGRhdGEgZnJhbWUNCmNvbmNyZXRlX25vcm0gPC0gYXMuZGF0YS5mcmFtZShsYXBwbHkoY29uY3JldGUsIG5vcm1hbGl6ZSkpDQoNCiMgY29uZmlybSB0aGF0IHRoZSByYW5nZSBpcyBub3cgYmV0d2VlbiB6ZXJvIGFuZCBvbmUNCnN1bW1hcnkoY29uY3JldGVfbm9ybSRzdHJlbmd0aCkNCg0KIyBjb21wYXJlZCB0byB0aGUgb3JpZ2luYWwgbWluaW11bSBhbmQgbWF4aW11bQ0Kc3VtbWFyeShjb25jcmV0ZSRzdHJlbmd0aCkNCg0KIyBjcmVhdGUgdHJhaW5pbmcgYW5kIHRlc3QgZGF0YQ0KY29uY3JldGVfdHJhaW4gPC0gY29uY3JldGVfbm9ybVsxOjc3MywgXQ0KY29uY3JldGVfdGVzdCA8LSBjb25jcmV0ZV9ub3JtWzc3NDoxMDMwLCBdDQoNCiMjIFN0ZXAgMzogVHJhaW5pbmcgYSBtb2RlbCBvbiB0aGUgZGF0YSAtLS0tDQojIHRyYWluIHRoZSBuZXVyYWxuZXQgbW9kZWwNCmxpYnJhcnkobmV1cmFsbmV0KQ0KDQojIHNpbXBsZSBBTk4gd2l0aCBvbmx5IGEgc2luZ2xlIGhpZGRlbiBuZXVyb24NCnNldC5zZWVkKDEyMzQ1KSAjIHRvIGd1YXJhbnRlZSByZXBlYXRhYmxlIHJlc3VsdHMNCmNvbmNyZXRlX21vZGVsIDwtIG5ldXJhbG5ldChmb3JtdWxhID0gc3RyZW5ndGggfiBjZW1lbnQgKyBzbGFnICsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFzaCArIHdhdGVyICsgc3VwZXJwbGFzdGljICsgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2Fyc2VhZ2cgKyBmaW5lYWdnICsgYWdlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBjb25jcmV0ZV90cmFpbikNCg0KIyB2aXN1YWxpemUgdGhlIG5ldHdvcmsgdG9wb2xvZ3kNCnBsb3QoY29uY3JldGVfbW9kZWwpDQoNCiMgUmVmZXJlbmNlOiBodHRwOi8vd3d3LnItYmxvZ2dlcnMuY29tL25ldXJhbG5ldHRvb2xzLTEtMC0wLW5vdy1vbi1jcmFuLw0KIyBhbHRlcm5hdGl2ZSBwbG90DQpsaWJyYXJ5KE5ldXJhbE5ldFRvb2xzKQ0KDQojIHBsb3RuZXQNCnBhcihtYXIgPSBudW1lcmljKDQpLCBmYW1pbHkgPSAnc2VyaWYnKQ0KcGxvdG5ldChjb25jcmV0ZV9tb2RlbCwgYWxwaGEgPSAwLjYpDQoNCiMjIFN0ZXAgNDogRXZhbHVhdGluZyBtb2RlbCBwZXJmb3JtYW5jZSAtLS0tDQojIG9idGFpbiBtb2RlbCByZXN1bHRzDQptb2RlbF9yZXN1bHRzIDwtIGNvbXB1dGUoY29uY3JldGVfbW9kZWwsIGNvbmNyZXRlX3Rlc3RbMTo4XSkNCiMgb2J0YWluIHByZWRpY3RlZCBzdHJlbmd0aCB2YWx1ZXMNCnByZWRpY3RlZF9zdHJlbmd0aCA8LSBtb2RlbF9yZXN1bHRzJG5ldC5yZXN1bHQNCiMgZXhhbWluZSB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiBwcmVkaWN0ZWQgYW5kIGFjdHVhbCB2YWx1ZXMNCmNvcihwcmVkaWN0ZWRfc3RyZW5ndGgsIGNvbmNyZXRlX3Rlc3Qkc3RyZW5ndGgpICAgIyBoaWdoZXIgdGhhbiBzdGF0ZWQgaW4gYm9vayAwLjcxNzAzNjg2NDYNCg0KIyBwcm9kdWNlIGFjdHVhbCBwcmVkaWN0aW9ucyBieSANCg0KaGVhZChwcmVkaWN0ZWRfc3RyZW5ndGgpDQoNCmNvbmNyZXRlX3RyYWluX29yaWdpbmFsX3N0cmVuZ3RoIDwtIGNvbmNyZXRlWzE6NzczLCJzdHJlbmd0aCJdDQoNCnN0cmVuZ3RoX21pbiA8LSBtaW4oY29uY3JldGVfdHJhaW5fb3JpZ2luYWxfc3RyZW5ndGgpDQpzdHJlbmd0aF9tYXggPC0gbWF4KGNvbmNyZXRlX3RyYWluX29yaWdpbmFsX3N0cmVuZ3RoKQ0KDQpoZWFkKGNvbmNyZXRlX3RyYWluX29yaWdpbmFsX3N0cmVuZ3RoKQ0KDQojIGN1c3RvbSBub3JtYWxpemF0aW9uIGZ1bmN0aW9uDQp1bm5vcm1hbGl6ZSA8LSBmdW5jdGlvbih4LCBtaW4sIG1heCkgeyANCiAgcmV0dXJuKCAobWF4IC0gbWluKSp4ICsgbWluICkNCn0NCg0Kc3RyZW5ndGhfcHJlZCA8LSB1bm5vcm1hbGl6ZShwcmVkaWN0ZWRfc3RyZW5ndGgsIHN0cmVuZ3RoX21pbiwgc3RyZW5ndGhfbWF4KQ0Kc3RyZW5ndGhfcHJlZA0KDQojIyBTdGVwIDU6IEltcHJvdmluZyBtb2RlbCBwZXJmb3JtYW5jZSAtLS0tDQojIGEgbW9yZSBjb21wbGV4IG5ldXJhbCBuZXR3b3JrIHRvcG9sb2d5IHdpdGggNSBoaWRkZW4gbmV1cm9ucw0Kc2V0LnNlZWQoMTIzNDUpICMgdG8gZ3VhcmFudGVlIHJlcGVhdGFibGUgcmVzdWx0cw0KY29uY3JldGVfbW9kZWwyIDwtIG5ldXJhbG5ldChzdHJlbmd0aCB+IGNlbWVudCArIHNsYWcgKw0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFzaCArIHdhdGVyICsgc3VwZXJwbGFzdGljICsgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29hcnNlYWdnICsgZmluZWFnZyArIGFnZSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IGNvbmNyZXRlX3RyYWluLCBoaWRkZW4gPSA1LCBhY3QuZmN0ID0gImxvZ2lzdGljIikNCg0KIyBwbG90IHRoZSBuZXR3b3JrDQpwbG90KGNvbmNyZXRlX21vZGVsMikNCg0KIyBwbG90bmV0DQpwYXIobWFyID0gbnVtZXJpYyg0KSwgZmFtaWx5ID0gJ3NlcmlmJykNCnBsb3RuZXQoY29uY3JldGVfbW9kZWwyLCBhbHBoYSA9IDAuNikNCg0KIyBldmFsdWF0ZSB0aGUgcmVzdWx0cyBhcyB3ZSBkaWQgYmVmb3JlDQptb2RlbF9yZXN1bHRzMiA8LSBjb21wdXRlKGNvbmNyZXRlX21vZGVsMiwgY29uY3JldGVfdGVzdFsxOjhdKQ0KcHJlZGljdGVkX3N0cmVuZ3RoMiA8LSBtb2RlbF9yZXN1bHRzMiRuZXQucmVzdWx0DQpjb3IocHJlZGljdGVkX3N0cmVuZ3RoMiwgY29uY3JldGVfdGVzdCRzdHJlbmd0aCkgICMgaGlnaGVyIHRoYW4gc3RhdGVkIGluIGJvb2sgMC44MDE0NDQ1ODMNCg0KIyB0cnkgZGlmZmVyZW50IGFjdGl2YXRpb24gZnVuY3Rpb24NCiMgYSBtb3JlIGNvbXBsZXggbmV1cmFsIG5ldHdvcmsgdG9wb2xvZ3kgd2l0aCA1IGhpZGRlbiBuZXVyb25zDQpzZXQuc2VlZCgxMjM0NSkgIyB0byBndWFyYW50ZWUgcmVwZWF0YWJsZSByZXN1bHRzDQpjb25jcmV0ZV9tb2RlbDIgPC0gbmV1cmFsbmV0KHN0cmVuZ3RoIH4gY2VtZW50ICsgc2xhZyArDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYXNoICsgd2F0ZXIgKyBzdXBlcnBsYXN0aWMgKyANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2Fyc2VhZ2cgKyBmaW5lYWdnICsgYWdlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gY29uY3JldGVfdHJhaW4sIGhpZGRlbiA9IDUsIGFjdC5mY3QgPSAidGFuaCIpDQoNCiMgZXZhbHVhdGUgdGhlIHJlc3VsdHMgYXMgd2UgZGlkIGJlZm9yZQ0KbW9kZWxfcmVzdWx0czIgPC0gY29tcHV0ZShjb25jcmV0ZV9tb2RlbDIsIGNvbmNyZXRlX3Rlc3RbMTo4XSkNCnByZWRpY3RlZF9zdHJlbmd0aDIgPC0gbW9kZWxfcmVzdWx0czIkbmV0LnJlc3VsdA0KY29yKHByZWRpY3RlZF9zdHJlbmd0aDIsIGNvbmNyZXRlX3Rlc3Qkc3RyZW5ndGgpICANCg0KYGBgDQoNCg==