This one is based on high level data set. There are 674 samples in training data, 450 samples for validation while 605 for testing.Tere are 21 input variables.

####download data and library,functions###################
load("~/PED/datasets/dataset_ 14 _prepared.RData")
load("~/PED/datasets/train_classification.RData")
load("~/PED/regressionWithBalanceData/trainAndValDataset_H.RData")
load("~/PED/classification-regression/test_classfication_prediction.RData")
library(ggplot2)
library(nnet)
##distribution of training dataset
hist(targetsTrainH)

plot of chunk unnamed-chunk-1

###distributino of test data set
hist(targetsTest_high)

plot of chunk unnamed-chunk-1

Boostrapping function

####################ERRORS FUNCTIONS#############
source('~/functions/calculateErrors.R', echo=TRUE)
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > modelsErrorsTotal <- 0
## 
## > allModelErrors <- function(models, inputsTest, targetsTest, 
## +     dataset) {
## +     error <- function(model) {
## +         pd <- predict(model, newdat .... [TRUNCATED]
##############NORMALIZATION FUNCTION#############################
source('~/functions/normalizationFunction.R', echo=TRUE)
## 
## > normalized <- function(x) {
## +     (x - min(x)) * 0.8/(max(x) - min(x)) + 0.1
## + }
###################BOOSTING SAMPLING###############
##########x is the input, y is the output######
boost.sample<-function(x,y, model){
        predict(model,newdata=x)->pred
        abs(y-pred)->e
        normalized(e)->prob.e
        ##############based on the error to resample #################
        index<-sample(1:nrow(x),replace=TRUE,prob=prob.e)
        x[index,]->X
        y[index]->Y
        data.frame(X,Y)->samples
        return(samples)
}
##################training nnet for each sampling interations##############
##############plot the error for both training and validation data set#################
plotBoostingTrainingAndValNnet<-function(dataset,boostIter,targets,targetsVal,size=5){
        ############the errors of training set##################
        error_train<-data.frame()
        error_train<-modelErrors(as.data.frame(Y_pred[[1]])[,],targets)
        for(i in 2:boostIter){ 
                error_train<- rbind(error_train,modelErrors(as.data.frame(Y_pred[[i]])[,],samples[[i-1]]$Y))
        }
        colnames(error_train)<-c("MAE","RMSE", "RELE")
        rownames(error_train)<-1:nrow(error_train)
        error_train<-data.frame(error_train,dataType="train",iter=1:nrow(error_train))
        error_train<-as.data.frame(error_train)
        #####################the errors of validation set
        error_val<-data.frame()
       for(i in 1:boostIter){
               error_val<- rbind(error_val,modelErrors(as.data.frame(val_pred[[i]])[,],targetsVal))
       }
       colnames(error_val)<-c("MAE","RMSE", "RELE")
       rownames(error_val)<-1:nrow(error_val)
       error_val<-data.frame(error_val,dataType="validation",iter=1:nrow(error_val))
       rbind(error_train,error_val)->error
       error_shape<-reshape(error,times=names(error[,c("MAE","RMSE","RELE")]),timevar="errorType",varying=list(names(error[,c("MAE","RMSE", "RELE")])),v.names="error",direction="long")->error_shape
       save(error_shape,file=paste("dataset_",dataset,"_nnet_size_",size,"_error_shape.RData"))
       ggplot(error_shape,aes(x=iter,y=error,interaction(errorType, dataType),colour=dataType))+geom_line(aes(linetype=errorType),size=1)+geom_point(size=2)+xlab("interation") +ylab("error")
}

The model is trained by neuron network. The first plot is trained by size 5 and the second one is training by size 8.

#####BOOSING WITH NNET on training sets######################
if(file.exists("samples_predict_dataset H2_30_iter _nnet_size_ 5 .RData")&file.exists("nnetFit_dataset_ H2_30_iter _nnet_size_ 5 .RData")){
   load("~/PED/regressionWithBalanceData/samples_predict_dataset H2_30_iter _nnet_size_ 5 .RData")
   load("~/PED/regressionWithBalanceData/nnetFit_dataset_ H2_30_iter _nnet_size_ 5 .RData")     
}else{
        boost.nnetFit.val(inputsTrainH2,targetsTrainH2,30,"H2_30_iter",inputsValH2,size=5)
}

##############plot the error for both training and validation data set#################
plotBoostingTrainingAndValNnet("H2_30_iter",30,targetsTrainH2,targetsValH2,size=5)

plot of chunk .3

###############size is 8############################################
if(file.exists("samples_predict_dataset H2_30_iter _nnet_size_ 8 .RData")&file.exists("nnetFit_dataset_ H2_30_iter _nnet_size_ 8 .RData")){
   load("~/PED/regressionWithBalanceData/samples_predict_dataset H2_30_iter _nnet_size_ 8 .RData")
   load("~/PED/regressionWithBalanceData/nnetFit_dataset_ H2_30_iter _nnet_size_ 8 .RData")     
}else{
   boost.nnetFit.val(inputsTrainH2,targetsTrainH2,30,"H2_30_iter",inputsValH2,size=8) 
}

plotBoostingTrainingAndValNnet("H2_30_iter",30,targetsTrainH2,targetsValH2,size=8)

plot of chunk .3

The red line is the predict value while the black is the real value. The first plot is for the train data set while the second plot is for the test data set.

load("~/PED/regressionWithBalanceData/samples_predict_dataset H2_30_iter _nnet_size_ 5 .RData")
Y_pred[[1]]->pred
plot(pred[,],type="l")
lines(targetsTrainH2,col="red")

plot of chunk .5

predict(nnetFit[[1]],newdata=inputsTest_high)->pred_high
plot(pred_high,type="l")
lines(targetsTest_high,col="red")

plot of chunk .5

errors

## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > modelsErrorsTotal <- 0
## 
## > allModelErrors <- function(models, inputsTest, targetsTest, 
## +     dataset) {
## +     error <- function(model) {
## +         pd <- predict(model, newdat .... [TRUNCATED]
##     MAE    RMSE    RELE 
## 0.05393 0.06869 0.12676
##     MAE    RMSE    RELE 
## 0.09732 0.12580 0.30310