In the post, I will use simple neural network to predict concrete strength with data from (UCI Machine Learning Repository)[http://archive.ics.uci.edu/ml/].
#setwd("C:/Users/Maxwell/Desktop/Learn R")
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
concrete=read.csv("concrete.csv")
str(concrete)
## 'data.frame': 1030 obs. of 9 variables:
## $ cement : num 540 540 332 332 199 ...
## $ slag : num 0 0 142 142 132 ...
## $ ash : num 0 0 0 0 0 0 0 0 0 0 ...
## $ water : num 162 162 228 228 192 228 228 228 228 228 ...
## $ superplastic: num 2.5 2.5 0 0 0 0 0 0 0 0 ...
## $ coarseagg : num 1040 1055 932 932 978 ...
## $ fineagg : num 676 676 594 594 826 ...
## $ age : int 28 28 270 365 360 90 365 28 28 28 ...
## $ strength : num 80 61.9 40.3 41 44.3 ...
summary(concrete)
## cement slag ash water
## Min. :102.0 Min. : 0.0 Min. : 0.00 Min. :121.8
## 1st Qu.:192.4 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.:164.9
## Median :272.9 Median : 22.0 Median : 0.00 Median :185.0
## Mean :281.2 Mean : 73.9 Mean : 54.19 Mean :181.6
## 3rd Qu.:350.0 3rd Qu.:142.9 3rd Qu.:118.30 3rd Qu.:192.0
## Max. :540.0 Max. :359.4 Max. :200.10 Max. :247.0
## superplastic coarseagg fineagg age
## Min. : 0.000 Min. : 801.0 Min. :594.0 Min. : 1.00
## 1st Qu.: 0.000 1st Qu.: 932.0 1st Qu.:731.0 1st Qu.: 7.00
## Median : 6.400 Median : 968.0 Median :779.5 Median : 28.00
## Mean : 6.205 Mean : 972.9 Mean :773.6 Mean : 45.66
## 3rd Qu.:10.200 3rd Qu.:1029.4 3rd Qu.:824.0 3rd Qu.: 56.00
## Max. :32.200 Max. :1145.0 Max. :992.6 Max. :365.00
## strength
## Min. : 2.33
## 1st Qu.:23.71
## Median :34.45
## Mean :35.82
## 3rd Qu.:46.13
## Max. :82.60
normalize=function(data){
return(
(data-min(data) )/(max(data)-min(data) )
)
}
concrete_n=sapply(concrete,normalize)
summary(concrete_n)
## cement slag ash water
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2063 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.3442
## Median :0.3902 Median :0.06121 Median :0.0000 Median :0.5048
## Mean :0.4091 Mean :0.20561 Mean :0.2708 Mean :0.4774
## 3rd Qu.:0.5662 3rd Qu.:0.39775 3rd Qu.:0.5912 3rd Qu.:0.5607
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## superplastic coarseagg fineagg age
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.3808 1st Qu.:0.3436 1st Qu.:0.01648
## Median :0.1988 Median :0.4855 Median :0.4654 Median :0.07418
## Mean :0.1927 Mean :0.4998 Mean :0.4505 Mean :0.12270
## 3rd Qu.:0.3168 3rd Qu.:0.6640 3rd Qu.:0.5770 3rd Qu.:0.15110
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## strength
## Min. :0.0000
## 1st Qu.:0.2664
## Median :0.4001
## Mean :0.4172
## 3rd Qu.:0.5457
## Max. :1.0000
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
index=createDataPartition(concrete$strength,p=0.8,list=FALSE)
train_con=concrete_n[index,]
test_con=concrete_n[-index,]
#install.packages("neuralnet")
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 3.3.3
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(neuralnet)
# could not use . to represent all features in the dataset
nns=neuralnet(strength~cement + slag
+ ash + water + superplastic + coarseagg + fineagg + age,data=train_con,hidden=1)
plot(nns)
# you need to take out the target value out of the data frame for predicting
nn_pred=compute(nns, test_con[,1:8])
std_nn=(nn_pred$net.result-test_con[,9])^2%>%mean
# the mean square error is
std_nn
## [1] 0.01244459908
# the correlation between test value and prediction value
cor(nn_pred$net.result,test_con[,9])
## [,1]
## [1,] 0.8500873981