This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
## Modeling the Strength of Concrete ----
## Step 1: Collecting data
## Dataset: Copressive strength of concrete--- obtained from the UCI Machine Learning Data Repository (http://archive.ics.uci.edu/ml)
## Step 2: Exploring and preparing the data ----
# read in data and examine structure
concrete <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml11/concrete.csv")
str(concrete)
'data.frame': 1030 obs. of 9 variables:
$ cement : num 141 169 250 266 155 ...
$ slag : num 212 42.2 0 114 183.4 ...
$ ash : num 0 124.3 95.7 0 0 ...
$ water : num 204 158 187 228 193 ...
$ superplastic: num 0 10.8 5.5 0 9.1 0 0 6.4 0 9 ...
$ coarseagg : num 972 1081 957 932 1047 ...
$ fineagg : num 748 796 861 670 697 ...
$ age : int 28 14 28 28 28 90 7 56 28 28 ...
$ strength : num 29.9 23.5 29.2 45.9 18.3 ...
# custom normalization function
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
# apply normalization to entire data frame
concrete_norm <- as.data.frame(lapply(concrete, normalize))
# confirm that the range is now between zero and one
summary(concrete_norm$strength)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000000 0.2663511 0.4000872 0.4171915 0.5457207 1.0000000
# compared to the original minimum and maximum
summary(concrete$strength)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.33000 23.71000 34.44500 35.81796 46.13500 82.60000
# create training and test data
concrete_train <- concrete_norm[1:773, ]
concrete_test <- concrete_norm[774:1030, ]
## Step 3: Training a model on the data ----
# train the neuralnet model
library(neuralnet)
# simple ANN with only a single hidden neuron
set.seed(12345) # to guarantee repeatable results
concrete_model <- neuralnet(formula = strength ~ cement + slag +
ash + water + superplastic +
coarseagg + fineagg + age,
data = concrete_train)
# visualize the network topology
plot(concrete_model)
# Reference: http://www.r-bloggers.com/neuralnettools-1-0-0-now-on-cran/
# alternative plot
library(NeuralNetTools)
# plotnet
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model, alpha = 0.6)
## Step 4: Evaluating model performance ----
# obtain model results
model_results <- compute(concrete_model, concrete_test[1:8])
# obtain predicted strength values
predicted_strength <- model_results$net.result
# examine the correlation between predicted and actual values
cor(predicted_strength, concrete_test$strength) # higher than stated in book 0.7170368646
[,1]
[1,] 0.8064655576
# produce actual predictions by
head(predicted_strength)
[,1]
774 0.3258991537
775 0.4677425372
776 0.2370268181
777 0.6718811029
778 0.4663428766
779 0.4685272270
concrete_train_original_strength <- concrete[1:773,"strength"]
strength_min <- min(concrete_train_original_strength)
strength_max <- max(concrete_train_original_strength)
head(concrete_train_original_strength)
[1] 29.89 23.51 29.22 45.85 18.29 21.86
# custom normalization function
unnormalize <- function(x, min, max) {
return( (max - min)*x + min )
}
strength_pred <- unnormalize(predicted_strength, strength_min, strength_max)
strength_pred
[,1]
774 28.212910787
775 39.478112301
776 21.154669896
777 55.690797192
778 39.366951260
779 39.540432369
780 39.928033889
781 49.040354523
782 27.907103923
783 20.321053693
784 7.266385231
785 46.876247191
786 48.856855398
787 46.703583933
788 45.448376324
789 51.065068074
790 43.680671652
791 28.193980938
792 28.827103782
793 18.345241127
794 18.252462770
795 15.643701142
796 17.801483462
797 23.682825829
798 13.976073367
799 12.194355970
800 33.449926982
801 25.088099888
802 52.607784390
803 24.433849292
804 34.735202333
805 44.689978758
806 43.020408989
807 33.080435982
808 34.849702382
809 47.563249060
810 6.938091659
811 29.327314842
812 52.193794638
813 18.724170879
814 23.403183011
815 31.805906354
816 26.771592010
817 54.848833698
818 26.325671398
819 18.271643621
820 21.589603938
821 19.723934696
822 24.236493867
823 26.151069034
824 44.930065559
825 45.984424330
826 36.569851275
827 18.383419764
828 37.547970377
829 44.080234009
830 17.365663875
831 49.764429240
832 23.644245310
833 18.886834859
834 46.263011829
835 54.475179301
836 20.727657800
837 50.093261598
838 35.938877950
839 55.151231281
840 43.539564013
841 20.696074238
842 44.549415622
843 24.482322674
844 16.201399197
845 42.375778976
846 45.270976016
847 44.638527605
848 10.311700822
849 46.304752045
850 19.307530759
851 55.737413630
852 24.793017815
853 23.814544656
854 33.546539494
855 42.695335181
856 49.811424381
857 48.652348988
858 52.638858641
859 53.166049496
860 24.183085073
861 41.825425595
862 54.927075804
863 51.251536938
864 53.452210772
865 49.787431937
866 29.885878058
867 34.712601224
868 44.396238610
869 15.761370235
870 51.046835680
871 9.046006076
872 18.467300274
873 47.763396508
874 16.520661292
875 46.236355997
876 44.008915499
877 15.788252948
878 35.338644057
879 55.721030895
880 26.239781748
881 54.219040311
882 31.887794120
883 28.775965442
884 21.828939255
885 49.764429240
886 46.302614755
887 44.990113139
888 43.872794257
889 33.613802430
890 49.821748527
891 22.489345534
892 54.731284069
893 53.523149545
894 48.268894377
895 39.425593582
896 25.287008255
897 34.585350422
898 43.126060379
899 46.312025009
900 43.287860282
901 54.227588359
902 55.759561064
903 55.126429340
904 41.980980168
905 23.770483604
906 31.495206543
907 30.872026632
908 45.319018412
909 48.325099603
910 46.844044826
911 16.395384725
912 49.033749656
913 35.609382272
914 53.447559068
915 28.490260269
916 26.097392370
917 34.553462242
918 18.034082548
919 46.017644897
920 55.756594314
921 19.113094271
922 45.880359504
923 33.667746328
924 15.840638297
925 15.250768816
926 25.967077829
927 49.040327527
928 20.250009925
929 53.463888607
930 39.712945454
931 18.635100239
932 43.052489413
933 36.781496963
934 47.367089782
935 36.459125460
936 50.202821978
937 49.768635382
938 47.960542410
939 23.039593502
940 44.787449396
941 22.994965622
942 52.115231571
943 30.231029721
944 46.490832321
945 13.518827249
946 47.463311002
947 33.652021124
948 36.959189963
949 42.675567254
950 43.839085507
951 55.441831705
952 55.730302275
953 42.495361138
954 45.995593485
955 14.559033517
956 53.889321097
957 47.638332031
958 55.756976692
959 14.065750036
960 43.086479175
961 27.286788837
962 21.414631033
963 14.686461194
964 49.955191984
965 41.489814802
966 50.757732386
967 36.406258094
968 50.903995407
969 18.617655592
970 42.779147183
971 27.548360275
972 55.198346170
973 21.614221641
974 43.774007128
975 33.582782000
976 34.494050231
977 31.427147694
978 30.748169174
979 41.713472991
980 52.745480397
981 48.698035754
982 48.463177370
983 39.479649564
984 43.545565848
985 51.163772080
986 55.097430829
987 16.960833436
988 21.228459298
989 16.372967444
990 45.699914067
991 40.015149409
992 54.915041504
993 52.363061591
994 46.703583933
995 22.971496748
996 55.757710774
997 23.665277416
998 30.203783047
999 38.388908212
1000 31.819252685
1001 44.736636088
1002 20.515435999
1003 43.646913655
1004 53.414481990
1005 24.202915410
1006 49.321113911
1007 15.705157900
1008 28.509942443
1009 51.916787243
1010 27.794472365
1011 15.938227741
1012 31.463715541
1013 44.909197894
1014 48.094743404
1015 29.573686435
1016 32.339635939
1017 21.638918442
1018 54.759105089
1019 20.858741569
1020 30.157137857
1021 10.357941496
1022 25.792662773
1023 17.857069663
1024 12.193204624
1025 27.194717549
1026 17.787728300
1027 52.970583241
1028 41.241771843
1029 55.704290534
1030 46.250018028
## Step 5: Improving model performance ----
# a more complex neural network topology with 5 hidden neurons
set.seed(12345) # to guarantee repeatable results
concrete_model2 <- neuralnet(strength ~ cement + slag +
ash + water + superplastic +
coarseagg + fineagg + age,
data = concrete_train, hidden = 5, act.fct = "logistic")
# plot the network
plot(concrete_model2)
# plotnet
par(mar = numeric(4), family = 'serif')
plotnet(concrete_model2, alpha = 0.6)
# evaluate the results as we did before
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength) # higher than stated in book 0.801444583
[,1]
[1,] 0.9244533426
# try different activation function
# a more complex neural network topology with 5 hidden neurons
set.seed(12345) # to guarantee repeatable results
concrete_model2 <- neuralnet(strength ~ cement + slag +
ash + water + superplastic +
coarseagg + fineagg + age,
data = concrete_train, hidden = 5, act.fct = "tanh")
# evaluate the results as we did before
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength)
[,1]
[1,] 0.5741729322