This is the example code that follows along with this blog post.
Please go the blog post for the entire explanation.
http://www.r-bloggers.com/fitting-a-neural-network-in-r-neuralnet-package/
set.seed(500)
library(MASS)
data <- Boston
apply(data,2,function(x) sum(is.na(x)))
## crim zn indus chas nox rm age dis rad
## 0 0 0 0 0 0 0 0 0
## tax ptratio black lstat medv
## 0 0 0 0 0
index <- sample(1:nrow(data),round(0.75*nrow(data)))
train <- data[index,]
test <- data[-index,]
lm.fit <- glm(medv~., data=train)
summary(lm.fit)
##
## Call:
## glm(formula = medv ~ ., data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14.9143 -2.8607 -0.5244 1.5242 25.0004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.469681 6.099347 7.127 5.50e-12 ***
## crim -0.105439 0.057095 -1.847 0.065596 .
## zn 0.044347 0.015974 2.776 0.005782 **
## indus 0.024034 0.071107 0.338 0.735556
## chas 2.596028 1.089369 2.383 0.017679 *
## nox -22.336623 4.572254 -4.885 1.55e-06 ***
## rm 3.538957 0.472374 7.492 5.15e-13 ***
## age 0.016976 0.015088 1.125 0.261291
## dis -1.570970 0.235280 -6.677 9.07e-11 ***
## rad 0.400502 0.085475 4.686 3.94e-06 ***
## tax -0.015165 0.004599 -3.297 0.001072 **
## ptratio -1.147046 0.155702 -7.367 1.17e-12 ***
## black 0.010338 0.003077 3.360 0.000862 ***
## lstat -0.524957 0.056899 -9.226 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 23.26491)
##
## Null deviance: 33642 on 379 degrees of freedom
## Residual deviance: 8515 on 366 degrees of freedom
## AIC: 2290
##
## Number of Fisher Scoring iterations: 2
pr.lm <- predict(lm.fit,test)
MSE.lm <- sum((pr.lm - test$medv)^2)/nrow(test)
maxs <- apply(data, 2, max)
mins <- apply(data, 2, min)
scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins))
train_ <- scaled[index,]
test_ <- scaled[-index,]
Here is a plot of the network:
library(neuralnet)
## Loading required package: grid
n <- names(train_)
f <- as.formula(paste("medv ~", paste(n[!n %in% "medv"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(5,3),linear.output=T)
plot(nn)
pr.nn <- compute(nn,test_[,1:13])
pr.nn_ <- pr.nn$net.result*(max(data$medv)-min(data$medv))+min(data$medv)
test.r <- (test_$medv)*(max(data$medv)-min(data$medv))+min(data$medv)
MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
print(paste(MSE.lm,MSE.nn))
## [1] "21.6297593507225 15.7518370200153"
par(mfrow=c(1,2))
plot(test$medv,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
plot(test$medv,pr.lm,col='blue',main='Real vs predicted lm',pch=18, cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)
plot(test$medv,pr.nn_,col='red',main='Real vs predicted NN',pch=18,cex=0.7)
points(test$medv,pr.lm,col='blue',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=18,col=c('red','blue'))
library(boot)
set.seed(200)
lm.fit <- glm(medv~.,data=data)
cv.glm(data,lm.fit,K=10)$delta[1]
## [1] 23.83560156
set.seed(450)
cv.error <- NULL
k <- 10
library(plyr)
pbar <- create_progress_bar('text')
pbar$init(k)
##
|
| | 0%
for(i in 1:k){
index <- sample(1:nrow(data),round(0.9*nrow(data)))
train.cv <- scaled[index,]
test.cv <- scaled[-index,]
nn <- neuralnet(f,data=train.cv,hidden=c(5,2),linear.output=T)
pr.nn <- compute(nn,test.cv[,1:13])
pr.nn <- pr.nn$net.result*(max(data$medv)-min(data$medv))+min(data$medv)
test.cv.r <- (test.cv$medv)*(max(data$medv)-min(data$medv))+min(data$medv)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
pbar$step()
}
##
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
mean(cv.error)
## [1] 10.32697995
cv.error
## [1] 17.640652805 6.310575067 15.769518577 5.730130820 10.520947119
## [6] 6.121160840 6.389967211 8.004786424 17.369282494 9.412778105
boxplot(cv.error,xlab='MSE CV',col='cyan',
border='blue',names='CV error (MSE)',
main='CV error (MSE) for NN',horizontal=TRUE)
Thank you for following along! and thank you to Michy Alice: http://www.r-bloggers.com/fitting-a-neural-network-in-r-neuralnet-package/