Loading the dataset
Concrete <- read_xls("Concrete_Data.xls")
Data Exploration
names(Concrete)
## [1] "Cement" "Blast" "Ash" "Water" "Super" "Coarse" "Fine"
## [8] "Age" "Strength"
glimpse(Concrete)
## Rows: 1,030
## Columns: 9
## $ Cement <dbl> 540.0, 540.0, 332.5, 332.5, 198.6, 266.0, 380.0, 380.0, 266.0…
## $ Blast <dbl> 0.0, 0.0, 142.5, 142.5, 132.4, 114.0, 95.0, 95.0, 114.0, 0.0,…
## $ Ash <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Water <dbl> 162, 162, 228, 228, 192, 228, 228, 228, 228, 228, 192, 192, 2…
## $ Super <dbl> 2.5, 2.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ Coarse <dbl> 1040.0, 1055.0, 932.0, 932.0, 978.4, 932.0, 932.0, 932.0, 932…
## $ Fine <dbl> 676.0, 676.0, 594.0, 594.0, 825.5, 670.0, 594.0, 594.0, 670.0…
## $ Age <dbl> 28, 28, 270, 365, 360, 90, 365, 28, 28, 28, 90, 28, 270, 90, …
## $ Strength <dbl> 79.986111, 61.887366, 40.269535, 41.052780, 44.296075, 47.029…
str(Concrete)
## tibble [1,030 × 9] (S3: tbl_df/tbl/data.frame)
## $ Cement : num [1:1030] 540 540 332 332 199 ...
## $ Blast : num [1:1030] 0 0 142 142 132 ...
## $ Ash : num [1:1030] 0 0 0 0 0 0 0 0 0 0 ...
## $ Water : num [1:1030] 162 162 228 228 192 228 228 228 228 228 ...
## $ Super : num [1:1030] 2.5 2.5 0 0 0 0 0 0 0 0 ...
## $ Coarse : num [1:1030] 1040 1055 932 932 978 ...
## $ Fine : num [1:1030] 676 676 594 594 826 ...
## $ Age : num [1:1030] 28 28 270 365 360 90 365 28 28 28 ...
## $ Strength: num [1:1030] 80 61.9 40.3 41.1 44.3 ...
summary(Concrete)
## Cement Blast Ash Water
## Min. :102.0 Min. : 0.0 Min. : 0.00 Min. :121.8
## 1st Qu.:192.4 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.:164.9
## Median :272.9 Median : 22.0 Median : 0.00 Median :185.0
## Mean :281.2 Mean : 73.9 Mean : 54.19 Mean :181.6
## 3rd Qu.:350.0 3rd Qu.:142.9 3rd Qu.:118.27 3rd Qu.:192.0
## Max. :540.0 Max. :359.4 Max. :200.10 Max. :247.0
## Super Coarse Fine Age
## Min. : 0.000 Min. : 801.0 Min. :594.0 Min. : 1.00
## 1st Qu.: 0.000 1st Qu.: 932.0 1st Qu.:731.0 1st Qu.: 7.00
## Median : 6.350 Median : 968.0 Median :779.5 Median : 28.00
## Mean : 6.203 Mean : 972.9 Mean :773.6 Mean : 45.66
## 3rd Qu.:10.160 3rd Qu.:1029.4 3rd Qu.:824.0 3rd Qu.: 56.00
## Max. :32.200 Max. :1145.0 Max. :992.6 Max. :365.00
## Strength
## Min. : 2.332
## 1st Qu.:23.707
## Median :34.443
## Mean :35.818
## 3rd Qu.:46.136
## Max. :82.599
describe(Concrete)
## vars n mean sd median trimmed mad min max range
## Cement 1 1030 281.17 104.51 272.90 273.47 117.72 102.00 540.0 438.00
## Blast 2 1030 73.90 86.28 22.00 62.43 32.62 0.00 359.4 359.40
## Ash 3 1030 54.19 64.00 0.00 46.85 0.00 0.00 200.1 200.10
## Water 4 1030 181.57 21.36 185.00 181.19 19.27 121.75 247.0 125.25
## Super 5 1030 6.20 5.97 6.35 5.56 7.87 0.00 32.2 32.20
## Coarse 6 1030 972.92 77.75 968.00 973.49 68.64 801.00 1145.0 344.00
## Fine 7 1030 773.58 80.18 779.51 776.41 67.44 594.00 992.6 398.60
## Age 8 1030 45.66 63.17 28.00 32.53 31.13 1.00 365.0 364.00
## Strength 9 1030 35.82 16.71 34.44 34.96 16.20 2.33 82.6 80.27
## skew kurtosis se
## Cement 0.51 -0.53 3.26
## Blast 0.80 -0.52 2.69
## Ash 0.54 -1.33 1.99
## Water 0.07 0.11 0.67
## Super 0.91 1.39 0.19
## Coarse -0.04 -0.61 2.42
## Fine -0.25 -0.11 2.50
## Age 3.26 12.07 1.97
## Strength 0.42 -0.32 0.52
pairs.panels(Concrete[c("Cement", "Blast", "Ash", "Water", "Super", "Coarse", "Fine", "Age")], pch = ",")

normalize the variables
normalize <- function(x){
return((x - min(x)) / (max(x) - min(x)))
}
Concrete_nrom <- as.data.frame(lapply(Concrete, normalize))
### checking the summary of the normalized data
summary(Concrete_nrom$Strength)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2663 0.4000 0.4172 0.5457 1.0000
sharing the data in ratio 80:20
set.seed(1234)
Concrete_train <- Concrete_nrom[1:824, ]
Concrete_test <- Concrete_nrom[825:1030, ]
training the model
set.seed(1234)
Concrete_model <- neuralnet(Strength ~ ., data = Concrete_train)
Neural Network Output
Concrete_model$result.matrix
## [,1]
## error 5.974301030
## reached.threshold 0.008517512
## steps 1171.000000000
## Intercept.to.1layhid1 -1.120440451
## Cement.to.1layhid1 3.333831269
## Blast.to.1layhid1 1.571717248
## Ash.to.1layhid1 0.441427814
## Water.to.1layhid1 -2.109621018
## Super.to.1layhid1 2.478641558
## Coarse.to.1layhid1 -0.159193672
## Fine.to.1layhid1 -1.308452339
## Age.to.1layhid1 12.203367534
## Intercept.to.Strength 0.034460406
## 1layhid1.to.Strength 0.631388471
### Generated Neural Network
plot(Concrete_model)
correlation
cor(pred_strength, Concrete_test$Strength)
## [,1]
## [1,] 0.7050092
improving the model with hidden layers
set.seed(1234)
concrete_model2 <- neuralnet(Strength ~ Cement + Blast + Ash + Water + Super + Coarse + Fine + Age, data = Concrete_train, hidden = c(2,1))
### plotting the network
plot(concrete_model2)
correlation
model_result2 <- compute(concrete_model2, Concrete_test[1:8])
pred_strength2 <- model_result2$net.result
### correlation
cor(pred_strength2, Concrete_test$Strength)
## [,1]
## [1,] 0.742509
improving the model with activation function
set.seed(2222)
Concrete_model3 <- neuralnet(Strength ~ Cement + Blast + Ash + Water + Super + Coarse + Fine + Age, data = Concrete_train, hidden = c(5,5), act.fct="tanh", threshold=0.04,linear.output=TRUE, stepmax=1e7)
Concrete_model3$result.matrix
## [,1]
## error 1.72419269
## reached.threshold 0.03715269
## steps 3238.00000000
## Intercept.to.1layhid1 -0.37908377
## Cement.to.1layhid1 0.33878857
## Blast.to.1layhid1 0.23946532
## Ash.to.1layhid1 -0.08210901
## Water.to.1layhid1 0.78891029
## Super.to.1layhid1 0.02648731
## Coarse.to.1layhid1 0.15362699
## Fine.to.1layhid1 0.24392552
## Age.to.1layhid1 10.34146339
## Intercept.to.1layhid2 1.30549934
## Cement.to.1layhid2 1.91455553
## Blast.to.1layhid2 1.29929610
## Ash.to.1layhid2 1.63131184
## Water.to.1layhid2 -2.20966217
## Super.to.1layhid2 0.10918505
## Coarse.to.1layhid2 0.12556970
## Fine.to.1layhid2 -0.74298122
## Age.to.1layhid2 -0.37938538
## Intercept.to.1layhid3 0.53985668
## Cement.to.1layhid3 0.72886993
## Blast.to.1layhid3 0.24270048
## Ash.to.1layhid3 0.31396228
## Water.to.1layhid3 -2.12567240
## Super.to.1layhid3 0.22320431
## Coarse.to.1layhid3 -0.26389784
## Fine.to.1layhid3 -0.68837957
## Age.to.1layhid3 -0.07133121
## Intercept.to.1layhid4 -0.26007634
## Cement.to.1layhid4 -0.69658366
## Blast.to.1layhid4 -0.75638914
## Ash.to.1layhid4 -0.09188597
## Water.to.1layhid4 0.22170910
## Super.to.1layhid4 0.02679513
## Coarse.to.1layhid4 -0.04597310
## Fine.to.1layhid4 0.06932040
## Age.to.1layhid4 -0.56702389
## Intercept.to.1layhid5 0.91047078
## Cement.to.1layhid5 1.28029854
## Blast.to.1layhid5 0.71881025
## Ash.to.1layhid5 57.10891025
## Water.to.1layhid5 -0.18515380
## Super.to.1layhid5 133.89285460
## Coarse.to.1layhid5 0.10698581
## Fine.to.1layhid5 -0.22170893
## Age.to.1layhid5 -0.57871609
## Intercept.to.2layhid1 -0.22292695
## 1layhid1.to.2layhid1 -1.00658213
## 1layhid2.to.2layhid1 1.06488005
## 1layhid3.to.2layhid1 -0.83060975
## 1layhid4.to.2layhid1 -0.65282599
## 1layhid5.to.2layhid1 -0.56630873
## Intercept.to.2layhid2 -0.60430672
## 1layhid1.to.2layhid2 -0.22625382
## 1layhid2.to.2layhid2 -0.78741836
## 1layhid3.to.2layhid2 -1.04416324
## 1layhid4.to.2layhid2 -0.93092397
## 1layhid5.to.2layhid2 0.42359886
## Intercept.to.2layhid3 -1.18555842
## 1layhid1.to.2layhid3 -1.59491053
## 1layhid2.to.2layhid3 0.14261232
## 1layhid3.to.2layhid3 -0.77054267
## 1layhid4.to.2layhid3 -2.43380383
## 1layhid5.to.2layhid3 1.03710445
## Intercept.to.2layhid4 1.45158015
## 1layhid1.to.2layhid4 2.06658328
## 1layhid2.to.2layhid4 1.76656251
## 1layhid3.to.2layhid4 3.11124414
## 1layhid4.to.2layhid4 0.41660409
## 1layhid5.to.2layhid4 0.23630127
## Intercept.to.2layhid5 0.05846175
## 1layhid1.to.2layhid5 0.24276429
## 1layhid2.to.2layhid5 0.53500664
## 1layhid3.to.2layhid5 -0.63031248
## 1layhid4.to.2layhid5 0.07681752
## 1layhid5.to.2layhid5 -0.29602929
## Intercept.to.Strength -0.79948815
## 2layhid1.to.Strength -1.45338674
## 2layhid2.to.Strength -0.56262610
## 2layhid3.to.Strength 0.98557584
## 2layhid4.to.Strength 0.17517669
## 2layhid5.to.Strength 1.40744755
plotting the network
plot(Concrete_model3)
model_results3 <- compute(Concrete_model3, Concrete_test[1:8])
pred_strength3 <- model_results3$net.result
correlation between the predicted and actual strength
cor(pred_strength3, Concrete_test$Strength)
## [,1]
## [1,] 0.7570695
This is a much better improvement than we previously had.
Converting to actual values and the corresponding predictions side
by side
strengths <- data.frame(actual <- Concrete$Strength[825:1030],
pred <- pred_strength3)
head(strengths)
## actual....Concrete.Strength.825.1030. pred....pred_strength3
## 825 21.75297 0.3188331
## 826 39.09329 0.4535401
## 827 24.39366 0.3083154
## 828 50.51101 0.5018552
## 829 74.98741 0.7086625
## 830 37.16965 0.4508552
correlation of prediction and actual
cor(strengths$pred,strengths$actual)
## [1] 0.7570695
unnormalize the data
unnormalize <- function(x){
return(x * (max(Concrete$Strength) -
min(Concrete$Strength)) + min(Concrete$Strength))
}
strengths$pred_new <- unnormalize(strengths$pred)
strengths$error_pct <- (strengths$pred_new - strengths$actual)/strengths$actual
### checking the first 4 predictions
head(strengths, n = 4)
## actual....Concrete.Strength.825.1030. pred....pred_strength3 pred_new
## 825 21.75297 0.3188331 27.92372
## 826 39.09329 0.4535401 38.73630
## 827 24.39366 0.3083154 27.07949
## 828 50.51101 0.5018552 42.61443
## error_pct
## 825 0.283673995
## 826 -0.009131728
## 827 0.110103540
## 828 -0.156333872
The correlation remains the same
cor(strengths$pred_new, strengths$actual)
## [1] 0.7570695