Data sets

source('~/PED/classification/functoinForEnsembleClassification.R', echo=TRUE)
## 
## > normalized <- function(x) {
## +     (x - min(x)) * 0.8/(max(x) - min(x)) + 0.1
## + }
## 
## > library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## 
## > library(devtools)
## 
## > install_github("caretEnsemble", "zachmayer")
## Installing github repo(s) caretEnsemble/master from zachmayer
## Installing caretEnsemble.zip from https://github.com/zachmayer/caretEnsemble/archive/master.zip
## Installing caretEnsemble
## '/usr/lib/R/bin/R' --vanilla CMD INSTALL  \
##   '/tmp/RtmpJzRSrf/caretEnsemble-master'  \
##   --library='/home/gong/R/x86_64-pc-linux-gnu-library/3.1'  \
##   --with-keep.source
## 
## > library(caretEnsemble)
## Loading required package: caTools
## 
## > library(doMC)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## 
## > library(foreach)
## 
## > registerDoMC(cores = 5)
## 
## > class_ensemble_function <- function(inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest, dataset) {
## +     folds = 5
## +     repeats = 1
## +     my .... [TRUNCATED]
load("~/PED/prepareDataDay/feature_new2.RData")
nrow(feature_new2[feature_new2[,"MAXO3C"]<=0.11,])
## [1] 1113
nrow(feature_new2[feature_new2[,"MAXO3C"]>0.11,])
## [1] 1768
######change type of the output to "factor", which have the levels of "l" and "h"."l" means the daily maximum ozone level is less or equal to 0.11 while "h" means higher than 0.11 ppm.
feature_new2[feature_new2[,"MAXO3C"]<=0.11,"MAXO3C"]<-c("l")
feature_new2[feature_new2[,"MAXO3C"]!="l","MAXO3C"]<-c("h")
 apply(feature_new2[,-ncol(feature_new2)],normalized,MARGIN=2)->feature_new_norm2
indexTrainAndValidation2<-sample(1:nrow(feature_new_norm2),nrow(feature_new_norm2)*0.66)
feature_new_norm2[indexTrainAndValidation2,]->inputsTrain
feature_new_norm2[-indexTrainAndValidation2,]->inputsTest
feature_new2[indexTrainAndValidation2,"MAXO3C"]->targetsTrain
feature_new2[-indexTrainAndValidation2,"MAXO3C"]->targetsTest
as.factor(targetsTrain)->targetsTrain
as.factor(targetsTest)->targetsTest

train models

show the results

###Train set
load(paste("dataset_",2,"predsTrain.RData"))
resultsTrain<-sort(data.frame(colAUC(predsTrain, targetsTrain)))
sort(data.frame(colAUC(predsTrain,targetsTrain,plotROC=TRUE)))

plot of chunk unnamed-chunk-3

##          rpart svmRadial    ada   nnet ENS_greedy ENS_linear treebag rf
## h vs. l 0.7898    0.8242 0.8272 0.8469     0.8867     0.9032  0.9532  1
resultsTrain
##          rpart svmRadial    ada   nnet ENS_greedy ENS_linear treebag rf
## h vs. l 0.7898    0.8242 0.8272 0.8469     0.8867     0.9032  0.9532  1
###Test set
load(paste("dataset_",2,"predsTest.RData"))
resultsTest<-sort(data.frame(colAUC(predsTest, targetsTest)))
sort(data.frame(colAUC(predsTest,targetsTest,plotROC=TRUE)))

plot of chunk unnamed-chunk-3

##          rpart    rf svmRadial   ada   nnet ENS_greedy ENS_linear treebag
## h vs. l 0.7934 0.812    0.8222 0.825 0.8437     0.8867     0.9033  0.9664
resultsTest
##          rpart    rf svmRadial   ada   nnet ENS_greedy ENS_linear treebag
## h vs. l 0.7934 0.812    0.8222 0.825 0.8437     0.8867     0.9033  0.9664