Use the linear ensemble classifier to classify the targetsTest to “H” o “L” level

library(xtable)
source('~/PED/regression/regression_caretEnsemble_functions.R', echo=TRUE)
## 
## > library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## 
## > library(survival)
## Loading required package: splines
## 
## > library(splines)
## 
## > library(lattice)
## 
## > library(gbm)
## Loaded gbm 2.1
## 
## > library(methods)
## 
## > library(kernlab)
## 
## > library(MASS)
## 
## > library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:survival':
## 
##     cluster
## 
## > library(ggplot2)
## 
## > library(corrplot)
## 
## > library(pbapply)
## 
## > library(testthat)
## 
## > library(devtools)
## 
## > library(caretEnsemble)
## 
## > library(doMC)
## 
## > library(foreach)
## 
## > registerDoMC(cores = 5)
## 
## > denormalized <- function(y, output) {
## +     ((y - 0.1) * (max(output) - min(output))/0.8) + min(output)
## + }
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > regression_Training <- function(inputsTrain, targetsTrain, 
## +     dataset) {
## +     resultList = list()
## +     cvcontrol <- trainControl(method = "cv" .... [TRUNCATED]
source('~/functions/calculateErrors.R', echo=FALSE)
load("~/PED/classification/dataset_ 14 _prepared.RData")
load("~/PED/classification/dataset_ 14 predsTestClass.RData")
load("~/PED/classification/dataset_ 14 _linear.RData")
if(file.exists("test_classfication_prediction.RData")){
        load("test_classfication_prediction.RData")   
        }else{
        cbind(predsTestClass,targetsTestClass)->pred
        linear_pred<- predict(linear, newdata=inputsTest)
        table(targetsTestClass,linear_pred)
        save(linear_pred,file="linear_pred.RData")
        data.frame(inputsTest,targetsTestReg=targetsTestReg,targetsTestClass=targetsTestClass,linear_pred)->test_after_class
        # test_after_class$linear_pred_rev<-sample(c("h","l"),nrow(test_after_class),replace=TRUE)
        # test_after_class[test_after_class[,"linear_pred"]=="h","linear_pred_rev"]<-"l"
        # test_after_class[test_after_class[,"linear_pred"]=="l","linear_pred_rev"]<-"h"
        ###prepare data for regression based on different predict levels
        split(test_after_class,f=test_after_class$linear_pred)->test_pred_split
        test_pred_split[[1]]->test_pred_high
        test_pred_high[,c(1:21)]->inputsTest_high
        test_pred_high[,c(22)]->targetsTest_high
        test_pred_split[[2]]->test_pred_low
        test_pred_low[,c(1:21)]->inputsTest_low
        test_pred_low[,c(22)]->targetsTest_low
        save(inputsTest_high,targetsTest_high,inputsTest_low,targetsTest_low,file="test_classfication_prediction.RData")
        }

Firstly we used the general-regression training model predict all the test data set.

if(file.exists("models.GM.RData"))
  {
        load("models.GM.RData")
  }else{
          load("~/PED/regression/dataset_ _14_ _bagTreeFit.RData")
          load("~/PED/regression/dataset_ _14_ _gbmFit.RData")
          load("~/PED/regression/dataset_ _14_ _greedyFit.RData")
          load("~/PED/regression/dataset_ _14_ _linearFit.RData")
          load("~/PED/regression/dataset_ _14_ _lmFit.RData")
          load("~/PED/regression/dataset_ _14_ _nnetFit.RData")
          load("~/PED/regression/dataset_ _14_ _rfFit.RData")
          load("~/PED/regression/dataset_ _14_ _rpartFit.RData")
          load("~/PED/regression/dataset_ _14_ _svmFit.RData")
          models<-list(lmFit=lmFit,nnetFit=nnetFit,rfFit=rfFit,gbmFit=gbmFit,rpartFit=rpartFit,svmFit=svmFit,bagTreeFit=bagTreeFit,linearFit=linearFit,greedyFit=greedyFit)
          models->models.GM
          save(models.GM,file="models.GM.RData")
}

what if we used the general model to predict the whole test data set.Black is original data, red is prediction value by linear ensemble while the blue line is the prediction value by nnet

load("~/PED/regression/dataset_ _14_ _linearFit.RData")
load("~/PED/regression/dataset_ _14_ _nnetFit.RData")
predict(linearFit,newdata=inputsTest)->linear_pred_total
## Loading required package: nnet
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## Loading required package: rpart
## Loading required package: plyr
## Loading required package: ipred
plot(targetsTestReg,type="l")
lines(linear_pred_total,col="red")
predict(nnetFit,newdata=inputsTest)->nnetFit_pred_total
lines(nnetFit_pred_total,col="blue")
load("~/PED/regressionWithBalanceData/nnetFit_ _30_iter .RData")
predict(nnetFit,newdata=inputsTest)->nnetFit_pred_total_boosting
lines(nnetFit_pred_total_boosting,col="yellow")

plot of chunk .3

error that using the general model predict the total test dataset

allModelErrors(models.GM,inputsTest,targetsTestReg,"_14_")
load("~/PED/classification-regression/dataset_ _14_ _modelsErrorsTotal.RData")
modelsErrorsTotal->error14
error14
##        lmFit nnetFit   rfFit  gbmFit rpartFit  svmFit bagTreeFit linearFit
## MAE  0.07380 0.07145 0.07341 0.07593   0.0829 0.06992    0.07605    0.0709
## RMSE 0.09519 0.09304 0.09581 0.09811   0.1064 0.09271    0.09896    0.0927
## RELE 0.24320 0.23152 0.24351 0.25133   0.2810 0.21923    0.25204    0.2293
##      greedyFit
## MAE    0.07098
## RMSE   0.09289
## RELE   0.22881
xtable(error14,digits=rep(4,ncol(error14)+1),caption="regression training model predict all the test data set")
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Tue Dec 23 18:24:22 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrr}
##   \hline
##  & lmFit & nnetFit & rfFit & gbmFit & rpartFit & svmFit & bagTreeFit & linearFit & greedyFit \\ 
##   \hline
## MAE & 0.0738 & 0.0715 & 0.0734 & 0.0759 & 0.0829 & 0.0699 & 0.0760 & 0.0709 & 0.0710 \\ 
##   RMSE & 0.0952 & 0.0930 & 0.0958 & 0.0981 & 0.1064 & 0.0927 & 0.0990 & 0.0927 & 0.0929 \\ 
##   RELE & 0.2432 & 0.2315 & 0.2435 & 0.2513 & 0.2810 & 0.2192 & 0.2520 & 0.2293 & 0.2288 \\ 
##    \hline
## \end{tabular}
## \caption{regression training model predict all the test data set} 
## \end{table}

Error that using the general model predict the test_pred_high dataset

allModelErrors(models.GM,inputsTest_high,targetsTest_high,"_14_high")
load("~/PED/classification-regression/dataset_ _14_high _modelsErrorsTotal.RData")
modelsErrorsTotal->error_test_pred_high14
error_test_pred_high14
##        lmFit nnetFit   rfFit  gbmFit rpartFit  svmFit bagTreeFit linearFit
## MAE  0.07730 0.07646 0.07881 0.07981  0.08693 0.07586    0.08041   0.07648
## RMSE 0.09923 0.09873 0.10164 0.10377  0.11231 0.09843    0.10407   0.09879
## RELE 0.22477 0.22222 0.23356 0.23172  0.25062 0.21675    0.23556   0.22350
##      greedyFit
## MAE    0.07624
## RMSE   0.09867
## RELE   0.22065
xtable(error_test_pred_high14,digits=rep(4,ncol(error_test_pred_high14)+1),caption="regression model predict test_pred_high level dataset")
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Tue Dec 23 18:24:22 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrr}
##   \hline
##  & lmFit & nnetFit & rfFit & gbmFit & rpartFit & svmFit & bagTreeFit & linearFit & greedyFit \\ 
##   \hline
## MAE & 0.0773 & 0.0765 & 0.0788 & 0.0798 & 0.0869 & 0.0759 & 0.0804 & 0.0765 & 0.0762 \\ 
##   RMSE & 0.0992 & 0.0987 & 0.1016 & 0.1038 & 0.1123 & 0.0984 & 0.1041 & 0.0988 & 0.0987 \\ 
##   RELE & 0.2248 & 0.2222 & 0.2336 & 0.2317 & 0.2506 & 0.2168 & 0.2356 & 0.2235 & 0.2207 \\ 
##    \hline
## \end{tabular}
## \caption{regression model predict test_pred_high level dataset} 
## \end{table}

Errors that using the normal model predict the test_pred_low datase

allModelErrors(models.GM,inputsTest_low,targetsTest_low,"_14_low")
load("~/PED/classification-regression/dataset_ _14_low _modelsErrorsTotal.RData")
modelsErrorsTotal->error_test_pred_low14
error_test_pred_low14
##        lmFit nnetFit   rfFit  gbmFit rpartFit  svmFit bagTreeFit linearFit
## MAE  0.06722 0.06205 0.06325 0.06863  0.07531 0.05878    0.06785   0.06041
## RMSE 0.08711 0.08130 0.08378 0.08647  0.09445 0.08088    0.08857   0.08001
## RELE 0.27783 0.24900 0.26221 0.28817  0.33805 0.22389    0.28300   0.24020
##      greedyFit
## MAE    0.06111
## RMSE   0.08090
## RELE   0.24415
xtable(error_test_pred_low14,digits=rep(4,ncol(error_test_pred_low14)+1),caption="regression model predict test_pred_low level dataset")
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Tue Dec 23 18:24:23 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrr}
##   \hline
##  & lmFit & nnetFit & rfFit & gbmFit & rpartFit & svmFit & bagTreeFit & linearFit & greedyFit \\ 
##   \hline
## MAE & 0.0672 & 0.0620 & 0.0632 & 0.0686 & 0.0753 & 0.0588 & 0.0678 & 0.0604 & 0.0611 \\ 
##   RMSE & 0.0871 & 0.0813 & 0.0838 & 0.0865 & 0.0944 & 0.0809 & 0.0886 & 0.0800 & 0.0809 \\ 
##   RELE & 0.2778 & 0.2490 & 0.2622 & 0.2882 & 0.3380 & 0.2239 & 0.2830 & 0.2402 & 0.2441 \\ 
##    \hline
## \end{tabular}
## \caption{regression model predict test_pred_low level dataset} 
## \end{table}

Use high level regresion model to predict test_pred_high dataset

if(file.exists("models.HM.RData")){
  load("models.HM.RData")
}else{
  load("~/PED/regression/dataset_ 14_H _bagTreeFit.RData")
 load("~/PED/regression/dataset_ 14_H _gbmFit.RData")
 load("~/PED/regression/dataset_ 14_H _greedyFit.RData")
 load("~/PED/regression/dataset_ 14_H _linearFit.RData")
 load("~/PED/regression/dataset_ 14_H _lmFit.RData")
 load("~/PED/regression/dataset_ 14_H _nnetFit.RData")
 load("~/PED/regression/dataset_ 14_H _rfFit.RData")
 load("~/PED/regression/dataset_ 14_H _rpartFit.RData")
 load("~/PED/regression/dataset_ 14_H _svmFit.RData")
models<-list(lmFit=lmFit,nnetFit=nnetFit,rfFit=rfFit,gbmFit=gbmFit,rpartFit=rpartFit,svmFit=svmFit,bagTreeFit=bagTreeFit,linearFit=linearFit,greedyFit=greedyFit)
models->models.HM
save(models.HM,file="models.HM.RData")
}

Errors that high level regression models predict test_pred_high dataset

allModelErrors(models.HM,inputsTest_high,targetsTest_high,"_14_H")
load("~/PED/classification-regression/dataset_ _14_H _modelsErrorsTotal.RData")
modelsErrorsTotal->error14H
error14H
##        lmFit nnetFit   rfFit  gbmFit rpartFit  svmFit bagTreeFit linearFit
## MAE  0.08815 0.08857 0.08817 0.08921  0.09179 0.08313    0.08798   0.08763
## RMSE 0.11041 0.11098 0.11059 0.11143  0.11421 0.10525    0.11015   0.10982
## RELE 0.27901 0.28096 0.28044 0.28340  0.28775 0.25673    0.27821   0.27810
##      greedyFit
## MAE    0.08669
## RMSE   0.10881
## RELE   0.27381
xtable(error14H,digits=rep(4,ncol(error14H)+1),caption="high regression training model predict all the test data set")
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Tue Dec 23 18:24:38 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrr}
##   \hline
##  & lmFit & nnetFit & rfFit & gbmFit & rpartFit & svmFit & bagTreeFit & linearFit & greedyFit \\ 
##   \hline
## MAE & 0.0882 & 0.0886 & 0.0882 & 0.0892 & 0.0918 & 0.0831 & 0.0880 & 0.0876 & 0.0867 \\ 
##   RMSE & 0.1104 & 0.1110 & 0.1106 & 0.1114 & 0.1142 & 0.1053 & 0.1102 & 0.1098 & 0.1088 \\ 
##   RELE & 0.2790 & 0.2810 & 0.2804 & 0.2834 & 0.2877 & 0.2567 & 0.2782 & 0.2781 & 0.2738 \\ 
##    \hline
## \end{tabular}
## \caption{high regression training model predict all the test data set} 
## \end{table}

plot: black line is the origional data while the red one is the prediction data by high_linearFit, the blue one is prediction value by general linearFit

load("~/PED/regression/dataset_ 14_H _linearFit.RData")
predict(linearFit,newdata=inputsTest_high)->linear_high_pred
load("~/PED/regression/dataset_ _14_ _linearFit.RData")
predict(linearFit,newdata=inputsTest_high)->linear_pred2
load("~/PED/regressionWithBalanceData/nnetFit_ H_30_iter .RData")
predict(nnetFit,newdata=inputsTest_high)->nnetFit_pred_boosting_H
load("~/PED/regressionWithBalanceData/nnetFit_ _30_iter .RData")
predict(nnetFit,newdata=inputsTest_high)->nnetFit_pred_boosting
plot(targetsTest_high,type="l")
lines(linear_high_pred,col="red")
lines(linear_pred2,col="blue")
lines(nnetFit_pred_boosting,col="pink")
lines(nnetFit_pred_boosting,col="yellow")

plot of chunk .9

use low level regression model to predict test_pred_low dataset

if(file.exists("models.LM.RData"))
  {
  load("models.LM.RData")
  }else{
    load("~/PED/regression/dataset_ 14_L _bagTreeFit.RData")
    load("~/PED/regression/dataset_ 14_L _gbmFit.RData")
    load("~/PED/regression/dataset_ 14_L _greedyFit.RData")
    load("~/PED/regression/dataset_ 14_L _linearFit.RData")
    load("~/PED/regression/dataset_ 14_L _lmFit.RData")
    load("~/PED/regression/dataset_ 14_L _nnetFit.RData")
    load("~/PED/regression/dataset_ 14_L _rfFit.RData")
    load("~/PED/regression/dataset_ 14_L _rpartFit.RData")
    load("~/PED/regression/dataset_ 14_L _svmFit.RData")
    models<-list(lmFit=lmFit,nnetFit=nnetFit,rfFit=rfFit,gbmFit=gbmFit,rpartFit=rpartFit,svmFit=svmFit,bagTreeFit=bagTreeFit,linearFit=linearFit,greedyFit=greedyFit)
    models->models.LM
     save(models.LM,file="models.LM.RData")
  }

Errors that low level regression models predict test_pred_low dataset

allModelErrors(models.LM,inputsTest_low,targetsTest_low,"_14_L")
load("~/PED/classification-regression/dataset_ _14_L _modelsErrorsTotal.RData")
modelsErrorsTotal->error14L
error14L
##        lmFit nnetFit   rfFit  gbmFit rpartFit  svmFit bagTreeFit linearFit
## MAE  0.06537 0.06415 0.06384 0.06596  0.06741 0.06411    0.06505    0.0637
## RMSE 0.09403 0.09248 0.09242 0.09393  0.09503 0.09409    0.09446    0.0927
## RELE 0.22047 0.21571 0.21520 0.22700  0.23477 0.21298    0.21996    0.2137
##      greedyFit
## MAE    0.06407
## RMSE   0.09273
## RELE   0.21586
xtable(error14L,digits=rep(4,ncol(error14L)+1),caption="low regression training model predict low predict test data set")
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Tue Dec 23 18:24:55 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrrr}
##   \hline
##  & lmFit & nnetFit & rfFit & gbmFit & rpartFit & svmFit & bagTreeFit & linearFit & greedyFit \\ 
##   \hline
## MAE & 0.0654 & 0.0641 & 0.0638 & 0.0660 & 0.0674 & 0.0641 & 0.0651 & 0.0637 & 0.0641 \\ 
##   RMSE & 0.0940 & 0.0925 & 0.0924 & 0.0939 & 0.0950 & 0.0941 & 0.0945 & 0.0927 & 0.0927 \\ 
##   RELE & 0.2205 & 0.2157 & 0.2152 & 0.2270 & 0.2348 & 0.2130 & 0.2200 & 0.2137 & 0.2159 \\ 
##    \hline
## \end{tabular}
## \caption{low regression training model predict low predict test data set} 
## \end{table}

black is the origional low_test_pred data while red one is the prediction data by low_linearFit model, and blue one is the prediction data by general_linearFit model

load("~/PED/regression/dataset_ 14_L _linearFit.RData")
predict(linearFit,newdata=inputsTest_low)->linear_low_pred
load("~/PED/regression/dataset_ _14_ _linearFit.RData")
predict(linearFit,newdata=inputsTest_low)->linear_pred
load("~/PED/regressionWithBalanceData/nnetFit_ L_30_iter .RData")
predict(nnetFit,newdata=inputsTest_high)->nnetFit_pred_boosting_L
plot(targetsTest_low,type="l")
lines(linear_low_pred,col="red")
lines(linear_pred,col="blue")
lines(nnetFit_pred_boosting_L,col="yellow")

plot of chunk .12