Now I want to seperate the dataset into two parts: one is with high level ozone which the targetsTrain is “h”, another one is with low level of ozone , the targetsTrain is “l”. Then build two regression models based on these two data sets.
##
## > library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
##
## > library(survival)
## Loading required package: splines
##
## > library(splines)
##
## > library(lattice)
##
## > library(gbm)
## Loaded gbm 2.1
##
## > library(methods)
##
## > library(kernlab)
##
## > library(MASS)
##
## > library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:survival':
##
## cluster
##
## > library(ggplot2)
##
## > library(corrplot)
##
## > library(pbapply)
##
## > library(testthat)
##
## > library(devtools)
##
## > library(caretEnsemble)
## Loading required package: caTools
##
## > library(doMC)
##
## > library(foreach)
##
## > registerDoMC(cores = 5)
##
## > denormalized <- function(y, output) {
## + ((y - 0.1) * (max(output) - min(output))/0.8) + min(output)
## + }
##
## > modelErrors <- function(predicted, actual) {
## + sal <- vector(mode = "numeric", length = 3)
## + names(sal) <- c("MAE", "RMSE", "RELE")
## + me .... [TRUNCATED]
##
## > regression_Training <- function(inputsTrain, targetsTrain,
## + dataset) {
## + resultList = list()
## + cvcontrol <- trainControl(method = "cv" .... [TRUNCATED]
## Loading required package: nnet
## Loading required package: rpart
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## Loading required package: plyr
## Loading required package: ada
## Loading required package: ipred
## linear_pred
## targetsTestClass h l
## h 464 93
## l 135 235
Firstly we used the regression training model predict all the test data set.
## Predictions being made only for cases with complete data
## lmFit nnetFit rfFit rpartFit svmFit bagTreeFit linearFit
## MAE 0.07380 0.07123 0.07290 0.0829 0.06994 0.07545 0.07011
## RMSE 0.09519 0.09257 0.09543 0.1064 0.09204 0.09833 0.09182
## RELE 0.24320 0.23055 0.24029 0.2810 0.21980 0.24961 0.22727
## greedyFit gbm_ntrees_ 1 gbm_ntrees_ 2 gbm_ntrees_ 3 gbm_ntrees_ 4
## MAE 0.06996 0.08984 0.08506 0.08193 0.07991
## RMSE 0.09176 0.11253 0.10708 0.10371 0.10175
## RELE 0.22543 0.30752 0.29012 0.27804 0.27041
## gbm_ntrees_ 5
## MAE 0.07811
## RMSE 0.10012
## RELE 0.26338
use high level regresion model to predict the test dataset which was predicted as high level
## Predictions being made only for cases with complete data
## lmFit nnetFit rfFit rpartFit svmFit bagTreeFit linearFit
## MAE 0.08758 0.08803 0.0882 0.09169 0.08289 0.08682 0.08688
## RMSE 0.10955 0.11017 0.1105 0.11407 0.10483 0.10875 0.10870
## RELE 0.27455 0.27657 0.2780 0.28532 0.25362 0.27313 0.27341
## greedyFit gbm_ntrees_ 1 gbm_ntrees_ 2 gbm_ntrees_ 3 gbm_ntrees_ 4
## MAE 0.08668 0.09468 0.09241 0.09136 0.09026
## RMSE 0.10846 0.11608 0.11361 0.11268 0.11163
## RELE 0.27200 0.29376 0.28717 0.28522 0.28218
## gbm_ntrees_ 5
## MAE 0.09031
## RMSE 0.11162
## RELE 0.28254
use LOW level regression model to predict the test dataset which was predicted as low level
## Predictions being made only for cases with complete data
## lmFit nnetFit rfFit rpartFit svmFit bagTreeFit linearFit
## MAE 0.06538 0.06348 0.06423 0.06800 0.06429 0.06445 0.06382
## RMSE 0.09372 0.09152 0.09255 0.09503 0.09388 0.09298 0.09228
## RELE 0.22139 0.21337 0.21714 0.23778 0.21453 0.21933 0.21401
## greedyFit gbm_ntrees_ 1 gbm_ntrees_ 2 gbm_ntrees_ 3 gbm_ntrees_ 4
## MAE 0.06396 0.07094 0.06953 0.06833 0.06786
## RMSE 0.09231 0.09472 0.09428 0.09365 0.09335
## RELE 0.21579 0.25741 0.24962 0.24261 0.24019
## gbm_ntrees_ 5
## MAE 0.06747
## RMSE 0.09340
## RELE 0.23796