Loading the dataset

Concrete <- read_xls("Concrete_Data.xls")

Data Exploration

names(Concrete)
## [1] "Cement"   "Blast"    "Ash"      "Water"    "Super"    "Coarse"   "Fine"    
## [8] "Age"      "Strength"
glimpse(Concrete)
## Rows: 1,030
## Columns: 9
## $ Cement   <dbl> 540.0, 540.0, 332.5, 332.5, 198.6, 266.0, 380.0, 380.0, 266.0…
## $ Blast    <dbl> 0.0, 0.0, 142.5, 142.5, 132.4, 114.0, 95.0, 95.0, 114.0, 0.0,…
## $ Ash      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Water    <dbl> 162, 162, 228, 228, 192, 228, 228, 228, 228, 228, 192, 192, 2…
## $ Super    <dbl> 2.5, 2.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ Coarse   <dbl> 1040.0, 1055.0, 932.0, 932.0, 978.4, 932.0, 932.0, 932.0, 932…
## $ Fine     <dbl> 676.0, 676.0, 594.0, 594.0, 825.5, 670.0, 594.0, 594.0, 670.0…
## $ Age      <dbl> 28, 28, 270, 365, 360, 90, 365, 28, 28, 28, 90, 28, 270, 90, …
## $ Strength <dbl> 79.986111, 61.887366, 40.269535, 41.052780, 44.296075, 47.029…
str(Concrete)
## tibble [1,030 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Cement  : num [1:1030] 540 540 332 332 199 ...
##  $ Blast   : num [1:1030] 0 0 142 142 132 ...
##  $ Ash     : num [1:1030] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Water   : num [1:1030] 162 162 228 228 192 228 228 228 228 228 ...
##  $ Super   : num [1:1030] 2.5 2.5 0 0 0 0 0 0 0 0 ...
##  $ Coarse  : num [1:1030] 1040 1055 932 932 978 ...
##  $ Fine    : num [1:1030] 676 676 594 594 826 ...
##  $ Age     : num [1:1030] 28 28 270 365 360 90 365 28 28 28 ...
##  $ Strength: num [1:1030] 80 61.9 40.3 41.1 44.3 ...
summary(Concrete)
##      Cement          Blast            Ash             Water      
##  Min.   :102.0   Min.   :  0.0   Min.   :  0.00   Min.   :121.8  
##  1st Qu.:192.4   1st Qu.:  0.0   1st Qu.:  0.00   1st Qu.:164.9  
##  Median :272.9   Median : 22.0   Median :  0.00   Median :185.0  
##  Mean   :281.2   Mean   : 73.9   Mean   : 54.19   Mean   :181.6  
##  3rd Qu.:350.0   3rd Qu.:142.9   3rd Qu.:118.27   3rd Qu.:192.0  
##  Max.   :540.0   Max.   :359.4   Max.   :200.10   Max.   :247.0  
##      Super            Coarse            Fine            Age        
##  Min.   : 0.000   Min.   : 801.0   Min.   :594.0   Min.   :  1.00  
##  1st Qu.: 0.000   1st Qu.: 932.0   1st Qu.:731.0   1st Qu.:  7.00  
##  Median : 6.350   Median : 968.0   Median :779.5   Median : 28.00  
##  Mean   : 6.203   Mean   : 972.9   Mean   :773.6   Mean   : 45.66  
##  3rd Qu.:10.160   3rd Qu.:1029.4   3rd Qu.:824.0   3rd Qu.: 56.00  
##  Max.   :32.200   Max.   :1145.0   Max.   :992.6   Max.   :365.00  
##     Strength     
##  Min.   : 2.332  
##  1st Qu.:23.707  
##  Median :34.443  
##  Mean   :35.818  
##  3rd Qu.:46.136  
##  Max.   :82.599
describe(Concrete)
##          vars    n   mean     sd median trimmed    mad    min    max  range
## Cement      1 1030 281.17 104.51 272.90  273.47 117.72 102.00  540.0 438.00
## Blast       2 1030  73.90  86.28  22.00   62.43  32.62   0.00  359.4 359.40
## Ash         3 1030  54.19  64.00   0.00   46.85   0.00   0.00  200.1 200.10
## Water       4 1030 181.57  21.36 185.00  181.19  19.27 121.75  247.0 125.25
## Super       5 1030   6.20   5.97   6.35    5.56   7.87   0.00   32.2  32.20
## Coarse      6 1030 972.92  77.75 968.00  973.49  68.64 801.00 1145.0 344.00
## Fine        7 1030 773.58  80.18 779.51  776.41  67.44 594.00  992.6 398.60
## Age         8 1030  45.66  63.17  28.00   32.53  31.13   1.00  365.0 364.00
## Strength    9 1030  35.82  16.71  34.44   34.96  16.20   2.33   82.6  80.27
##           skew kurtosis   se
## Cement    0.51    -0.53 3.26
## Blast     0.80    -0.52 2.69
## Ash       0.54    -1.33 1.99
## Water     0.07     0.11 0.67
## Super     0.91     1.39 0.19
## Coarse   -0.04    -0.61 2.42
## Fine     -0.25    -0.11 2.50
## Age       3.26    12.07 1.97
## Strength  0.42    -0.32 0.52
pairs.panels(Concrete[c("Cement", "Blast", "Ash", "Water", "Super", "Coarse", "Fine", "Age")], pch = ",")

normalize the variables

normalize <- function(x){
  return((x - min(x)) / (max(x) - min(x)))
  
}

Concrete_nrom <- as.data.frame(lapply(Concrete, normalize))
### checking the summary of the normalized data
summary(Concrete_nrom$Strength)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.2663  0.4000  0.4172  0.5457  1.0000

sharing the data in ratio 80:20

set.seed(1234)
Concrete_train <- Concrete_nrom[1:824, ]
Concrete_test <- Concrete_nrom[825:1030, ]

training the model

set.seed(1234)

Concrete_model <- neuralnet(Strength ~ ., data = Concrete_train)

Neural Network Output

Concrete_model$result.matrix
##                                 [,1]
## error                    5.974301030
## reached.threshold        0.008517512
## steps                 1171.000000000
## Intercept.to.1layhid1   -1.120440451
## Cement.to.1layhid1       3.333831269
## Blast.to.1layhid1        1.571717248
## Ash.to.1layhid1          0.441427814
## Water.to.1layhid1       -2.109621018
## Super.to.1layhid1        2.478641558
## Coarse.to.1layhid1      -0.159193672
## Fine.to.1layhid1        -1.308452339
## Age.to.1layhid1         12.203367534
## Intercept.to.Strength    0.034460406
## 1layhid1.to.Strength     0.631388471
### Generated Neural Network
plot(Concrete_model)

evaluating model performance with the test data

model_results <- compute(Concrete_model, Concrete_test[1:8])
pred_strength <- model_results$net.result

correlation

cor(pred_strength, Concrete_test$Strength)
##           [,1]
## [1,] 0.7050092

improving the model with hidden layers

set.seed(1234)

concrete_model2 <-  neuralnet(Strength ~ Cement + Blast + Ash + Water + Super + Coarse + Fine + Age, data = Concrete_train, hidden = c(2,1))


### plotting the network 
plot(concrete_model2)

correlation

model_result2 <- compute(concrete_model2, Concrete_test[1:8])

pred_strength2 <- model_result2$net.result

### correlation 
cor(pred_strength2, Concrete_test$Strength)
##          [,1]
## [1,] 0.742509

improving the model with activation function

set.seed(2222)

Concrete_model3 <- neuralnet(Strength ~ Cement + Blast + Ash  + Water + Super + Coarse + Fine + Age, data = Concrete_train, hidden = c(5,5), act.fct="tanh", threshold=0.04,linear.output=TRUE, stepmax=1e7)


Concrete_model3$result.matrix
##                                [,1]
## error                    1.72419269
## reached.threshold        0.03715269
## steps                 3238.00000000
## Intercept.to.1layhid1   -0.37908377
## Cement.to.1layhid1       0.33878857
## Blast.to.1layhid1        0.23946532
## Ash.to.1layhid1         -0.08210901
## Water.to.1layhid1        0.78891029
## Super.to.1layhid1        0.02648731
## Coarse.to.1layhid1       0.15362699
## Fine.to.1layhid1         0.24392552
## Age.to.1layhid1         10.34146339
## Intercept.to.1layhid2    1.30549934
## Cement.to.1layhid2       1.91455553
## Blast.to.1layhid2        1.29929610
## Ash.to.1layhid2          1.63131184
## Water.to.1layhid2       -2.20966217
## Super.to.1layhid2        0.10918505
## Coarse.to.1layhid2       0.12556970
## Fine.to.1layhid2        -0.74298122
## Age.to.1layhid2         -0.37938538
## Intercept.to.1layhid3    0.53985668
## Cement.to.1layhid3       0.72886993
## Blast.to.1layhid3        0.24270048
## Ash.to.1layhid3          0.31396228
## Water.to.1layhid3       -2.12567240
## Super.to.1layhid3        0.22320431
## Coarse.to.1layhid3      -0.26389784
## Fine.to.1layhid3        -0.68837957
## Age.to.1layhid3         -0.07133121
## Intercept.to.1layhid4   -0.26007634
## Cement.to.1layhid4      -0.69658366
## Blast.to.1layhid4       -0.75638914
## Ash.to.1layhid4         -0.09188597
## Water.to.1layhid4        0.22170910
## Super.to.1layhid4        0.02679513
## Coarse.to.1layhid4      -0.04597310
## Fine.to.1layhid4         0.06932040
## Age.to.1layhid4         -0.56702389
## Intercept.to.1layhid5    0.91047078
## Cement.to.1layhid5       1.28029854
## Blast.to.1layhid5        0.71881025
## Ash.to.1layhid5         57.10891025
## Water.to.1layhid5       -0.18515380
## Super.to.1layhid5      133.89285460
## Coarse.to.1layhid5       0.10698581
## Fine.to.1layhid5        -0.22170893
## Age.to.1layhid5         -0.57871609
## Intercept.to.2layhid1   -0.22292695
## 1layhid1.to.2layhid1    -1.00658213
## 1layhid2.to.2layhid1     1.06488005
## 1layhid3.to.2layhid1    -0.83060975
## 1layhid4.to.2layhid1    -0.65282599
## 1layhid5.to.2layhid1    -0.56630873
## Intercept.to.2layhid2   -0.60430672
## 1layhid1.to.2layhid2    -0.22625382
## 1layhid2.to.2layhid2    -0.78741836
## 1layhid3.to.2layhid2    -1.04416324
## 1layhid4.to.2layhid2    -0.93092397
## 1layhid5.to.2layhid2     0.42359886
## Intercept.to.2layhid3   -1.18555842
## 1layhid1.to.2layhid3    -1.59491053
## 1layhid2.to.2layhid3     0.14261232
## 1layhid3.to.2layhid3    -0.77054267
## 1layhid4.to.2layhid3    -2.43380383
## 1layhid5.to.2layhid3     1.03710445
## Intercept.to.2layhid4    1.45158015
## 1layhid1.to.2layhid4     2.06658328
## 1layhid2.to.2layhid4     1.76656251
## 1layhid3.to.2layhid4     3.11124414
## 1layhid4.to.2layhid4     0.41660409
## 1layhid5.to.2layhid4     0.23630127
## Intercept.to.2layhid5    0.05846175
## 1layhid1.to.2layhid5     0.24276429
## 1layhid2.to.2layhid5     0.53500664
## 1layhid3.to.2layhid5    -0.63031248
## 1layhid4.to.2layhid5     0.07681752
## 1layhid5.to.2layhid5    -0.29602929
## Intercept.to.Strength   -0.79948815
## 2layhid1.to.Strength    -1.45338674
## 2layhid2.to.Strength    -0.56262610
## 2layhid3.to.Strength     0.98557584
## 2layhid4.to.Strength     0.17517669
## 2layhid5.to.Strength     1.40744755

plotting the network

plot(Concrete_model3)
model_results3 <- compute(Concrete_model3, Concrete_test[1:8])
pred_strength3 <- model_results3$net.result

correlation between the predicted and actual strength

cor(pred_strength3, Concrete_test$Strength)
##           [,1]
## [1,] 0.7570695

This is a much better improvement than we previously had.

Converting to actual values and the corresponding predictions side by side

strengths <- data.frame(actual <- Concrete$Strength[825:1030],
                        pred <- pred_strength3)

head(strengths)
##     actual....Concrete.Strength.825.1030. pred....pred_strength3
## 825                              21.75297              0.3188331
## 826                              39.09329              0.4535401
## 827                              24.39366              0.3083154
## 828                              50.51101              0.5018552
## 829                              74.98741              0.7086625
## 830                              37.16965              0.4508552

correlation of prediction and actual

cor(strengths$pred,strengths$actual)
## [1] 0.7570695

unnormalize the data

unnormalize <- function(x){
  return(x * (max(Concrete$Strength) -
                min(Concrete$Strength)) + min(Concrete$Strength))
}



strengths$pred_new <- unnormalize(strengths$pred)

strengths$error_pct <- (strengths$pred_new - strengths$actual)/strengths$actual

### checking the first 4 predictions
head(strengths, n = 4)
##     actual....Concrete.Strength.825.1030. pred....pred_strength3 pred_new
## 825                              21.75297              0.3188331 27.92372
## 826                              39.09329              0.4535401 38.73630
## 827                              24.39366              0.3083154 27.07949
## 828                              50.51101              0.5018552 42.61443
##        error_pct
## 825  0.283673995
## 826 -0.009131728
## 827  0.110103540
## 828 -0.156333872

The correlation remains the same

cor(strengths$pred_new, strengths$actual)
## [1] 0.7570695