Data sets
source('~/PED/classification/functoinForEnsembleClassification.R', echo=TRUE)
##
## > normalized <- function(x) {
## + (x - min(x)) * 0.8/(max(x) - min(x)) + 0.1
## + }
##
## > library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## > library(devtools)
##
## > install_github("caretEnsemble", "zachmayer")
## Installing github repo(s) caretEnsemble/master from zachmayer
## Installing caretEnsemble.zip from https://github.com/zachmayer/caretEnsemble/archive/master.zip
## Installing caretEnsemble
## '/usr/lib/R/bin/R' --vanilla CMD INSTALL \
## '/tmp/RtmpJzRSrf/caretEnsemble-master' \
## --library='/home/gong/R/x86_64-pc-linux-gnu-library/3.1' \
## --with-keep.source
##
## > library(caretEnsemble)
## Loading required package: caTools
##
## > library(doMC)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
##
## > library(foreach)
##
## > registerDoMC(cores = 5)
##
## > class_ensemble_function <- function(inputsTrain, targetsTrain,
## + inputsTest, targetsTest, dataset) {
## + folds = 5
## + repeats = 1
## + my .... [TRUNCATED]
load("~/PED/prepareDataDay/feature_new2.RData")
nrow(feature_new2[feature_new2[,"MAXO3C"]<=0.11,])
## [1] 1113
nrow(feature_new2[feature_new2[,"MAXO3C"]>0.11,])
## [1] 1768
######change type of the output to "factor", which have the levels of "l" and "h"."l" means the daily maximum ozone level is less or equal to 0.11 while "h" means higher than 0.11 ppm.
feature_new2[feature_new2[,"MAXO3C"]<=0.11,"MAXO3C"]<-c("l")
feature_new2[feature_new2[,"MAXO3C"]!="l","MAXO3C"]<-c("h")
apply(feature_new2[,-ncol(feature_new2)],normalized,MARGIN=2)->feature_new_norm2
indexTrainAndValidation2<-sample(1:nrow(feature_new_norm2),nrow(feature_new_norm2)*0.66)
feature_new_norm2[indexTrainAndValidation2,]->inputsTrain
feature_new_norm2[-indexTrainAndValidation2,]->inputsTest
feature_new2[indexTrainAndValidation2,"MAXO3C"]->targetsTrain
feature_new2[-indexTrainAndValidation2,"MAXO3C"]->targetsTest
as.factor(targetsTrain)->targetsTrain
as.factor(targetsTest)->targetsTest
train models
show the results
###Train set
load(paste("dataset_",2,"predsTrain.RData"))
resultsTrain<-sort(data.frame(colAUC(predsTrain, targetsTrain)))
sort(data.frame(colAUC(predsTrain,targetsTrain,plotROC=TRUE)))
## rpart svmRadial ada nnet ENS_greedy ENS_linear treebag rf
## h vs. l 0.7898 0.8242 0.8272 0.8469 0.8867 0.9032 0.9532 1
resultsTrain
## rpart svmRadial ada nnet ENS_greedy ENS_linear treebag rf
## h vs. l 0.7898 0.8242 0.8272 0.8469 0.8867 0.9032 0.9532 1
###Test set
load(paste("dataset_",2,"predsTest.RData"))
resultsTest<-sort(data.frame(colAUC(predsTest, targetsTest)))
sort(data.frame(colAUC(predsTest,targetsTest,plotROC=TRUE)))
## rpart rf svmRadial ada nnet ENS_greedy ENS_linear treebag
## h vs. l 0.7934 0.812 0.8222 0.825 0.8437 0.8867 0.9033 0.9664
resultsTest
## rpart rf svmRadial ada nnet ENS_greedy ENS_linear treebag
## h vs. l 0.7934 0.812 0.8222 0.825 0.8437 0.8867 0.9033 0.9664