classification models
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(xtable)
setwd("~/Documentos/HongKong/trainingModel/TW_previous2")
source('~/Documentos/HongKong/functions/classification.R', echo=TRUE)
##
## > normalized <- function(x) {
## + (x - min(x)) * 0.8/(max(x) - min(x)) + 0.1
## + }
##
## > library(caTools)
##
## > library(caret)
##
## > library(devtools)
##
## Attaching package: 'devtools'
##
## The following objects are masked from 'package:utils':
##
## ?, help
##
## The following object is masked from 'package:base':
##
## system.file
##
## > library(caretEnsemble)
##
## > library(doMC)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
##
## > library(foreach)
##
## > library(randomForest)
## randomForest 4.6-7
## Type rfNews() to see new features/changes/bug fixes.
##
## > library(gbm)
## Loading required package: survival
## Loading required package: splines
##
## Attaching package: 'survival'
##
## The following object is masked from 'package:caret':
##
## cluster
##
## Loaded gbm 2.1
##
## > registerDoMC(cores = 3)
##
## > class_ensemble_function <- function(inputsTrain, targetsTrain,
## + dataset) {
## + folds = 3
## + repeats = 1
## + seeds = set.seed(1)
## + m .... [TRUNCATED]
##
## > classPrediction <- function(all.models, inputs, dataset) {
## + predsClass <- data.frame(sapply(all.models, function(x) {
## + predict(x, as.d .... [TRUNCATED]
source('~/Documentos/HongKong/functions/rocFunction.R', echo=TRUE)
##
## > library(caTools)
##
## > rocFun <- function(all.models, inputs, targets) {
## + preds <- data.frame(sapply(all.models, function(x) {
## + predict(x, inputs, type = "pr ..." ... [TRUNCATED]
source('~/Documentos/HongKong/functions/classificationModelErrors.R', echo=TRUE)
##
## > library(caTools)
##
## > classModelErrors <- function(predict, targets, beta) {
## + t <- table(targets, predict)
## + TP <- t[1]
## + FP <- t[2]
## + FN <- t[3]
## + T .... [TRUNCATED]
###########################origional data set###################################
load("~/Documentos/HongKong/trainingModel/data/partitionData_TW_previoud2Day.RData")
class_ensemble_function(inputsTrain_p2,as.factor(targetsTrainClass_p2),"_TW_previous_")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
if(file.exists("all.models_previous.RData")){
load("all.models_previous.RData")
}else{
load("dataset_ _TW_previous_ _svmFit.RData")
load("dataset_ _TW_previous_ _nnetFit.RData")
load("dataset_ _TW_previous_ _rpartFit.RData")
load("dataset_ _TW_previous_ _gbmFit.RData")
load("dataset_ _TW_previous_ _treebagFit.RData")
load("dataset_ _TW_previous_ _greedy.RData")
load("dataset_ _TW_previous_ _linear.RData")
load("dataset_ _TW_previous_ _adaFit.RData")
load("dataset_ _TW_previous_ _rfFit.RData")
all.models_previous <- list(svmFit, nnetFit,rpartFit,rfFit,gbmFit,adaFit,treebagFit,linear)
names(all.models_previous) <- c("SVM","NN","rpart","RF","GBM","ada","bagging","LE")
save(all.models_previous,file="all.models_previous.RData")
}
###test sets#####
if(file.exists("modelsTest_previous.RData")){
load("modelsTest_previous.RData")
}else{
modelsTest_previous<-classPrediction(all.models_previous,inputsTest_p2,"_TW_previous_test_")
save(modelsTest_previous,file="modelsTest_previous.RData")
}
########################ubUnder10#########################
load("data_obUnder10.RData")
class_ensemble_function(inputsTrain_ubUnder10,as.factor(targetsTrain_ubUnder10),"_TW_ubUnder10_")
if(file.exists("all.models_ubUnder10.RData")){
load("all.models_ubUnder10.RData")
}else{
load(paste("dataset_","_TW_ubUnder10_","_svmFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_nnetFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_rpartFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_rfFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_gbmFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_adaFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_treebagFit.RData"))
load(paste("dataset_","_TW_ubUnder10_","_linear.RData"))
all.models_ubUnder10<- list(svmFit,nnetFit,rpartFit,rfFit,gbmFit,adaFit,treebagFit,linear)
names(all.models_ubUnder10) <- c("SVM","NN","rpart","RF","GBM","ada","bagging","LE")
save(all.models_ubUnder10,file="all.models_ubUnder10.RData")
}
###test sets#####
if(file.exists("modelsTest_ubUnder10.RData"))
{load("modelsTest_ubUnder10.RData")
}else{
modelsTest_ubUnder10<-classPrediction(all.models_ubUnder10,inputsTest_p2,"_TW_ubUnder10_test_")
save(modelsTest_ubUnder10,file="modelsTest_ubUnder10.RData")
}
########################ubUnder30#########################
load("data_obUnder30.RData")
class_ensemble_function(inputsTrain_ubUnder30,as.factor(targetsTrain_ubUnder30),"_TW_ubUnder30_")
if(file.exists("all.models_ubUnder30.RData")){
load("all.models_ubUnder30.RData")
}else{
load(paste("dataset_","_TW_ubUnder30_","_svmFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_nnetFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_rpartFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_gbmFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_adaFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_treebagFit.RData"))
load(paste("dataset_","_TW_ubUnder30_","_linear.RData"))
load(paste("dataset_","_TW_ubUnder30_","_rfFit.RData"))
all.models_ubUnder30<- list(svmFit,nnetFit,rpartFit,rfFit,gbmFit,adaFit,treebagFit,linear)
names(all.models_ubUnder30) <- c("SVM","NN","rpart","RF","GBM","ada","bagging","LE")
save(all.models_ubUnder30,file="all.models_ubUnder30.RData")
}
###test sets#####
if(file.exists("modelsTest_ubUnder30.RData"))
{load("modelsTest_ubUnder30.RData")
}else{
modelsTest_ubUnder30<-classPrediction(all.models_ubUnder30,inputsTest_p2,"_TW_ubUnder30_test_")
save(modelsTest_ubUnder30,file="modelsTest_ubUnder30.RData")
}
#######################over-sample#########################
load("data_ubOver.RData")
rownames(inputsTrain_ubOver)<-c(1:nrow(inputsTrain_ubOver))
class_ensemble_function(inputsTrain_ubOver,as.factor(targetsTrain_ubOver),"_TW_ubOver_")
if(file.exists("all.models_ubOver.RData")){
load("all.models_ubOver.RData")
}else{
load(paste("dataset_","_TW_ubOver_","_svmFit.RData"))
load(paste("dataset_","_TW_ubOver_","_nnetFit.RData"))
load(paste("dataset_","_TW_ubOver_","_rpartFit.RData"))
load(paste("dataset_","_TW_ubOver_","_rfFit.RData"))
load(paste("dataset_","_TW_ubOver_","_gbmFit.RData"))
load(paste("dataset_","_TW_ubOver_","_adaFit.RData"))
load(paste("dataset_","_TW_ubOver_","_treebagFit.RData"))
load(paste("dataset_","_TW_ubOver_","_linear.RData"))
all.models_ubOver<- list(svmFit,nnetFit,rpartFit,rfFit,gbmFit,adaFit,treebagFit,linear)
names(all.models_ubOver) <- c("SVM","NN","rpart","RF","GBM","ada","bagging","LE")
save(all.models_ubOver,file="all.models_ubOver.RData")
}
###test sets#####
if(file.exists("modelsTest_ubOver.RData")){
load("modelsTest_ubOver.RData")
}else{
modelsTest_ubOver<-classPrediction(all.models_ubOver,inputsTest_p2,"_TW_ubOver_")
save(modelsTest_ubOver,file="modelsTest_ubOver.RData")
}
###############################SMOTE###################################
load("data_SMOTE.RData")
class_ensemble_function(inputsTrain_SMOTE,as.factor(targetsTrain_SMOTE),"_TW_SMOTE_")
if(file.exists("all.models_SMOTE.RData")){
load("all.models_SMOTE.RData")
}else{
load(paste("dataset_","_TW_SMOTE_","_svmFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_nnetFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_rpartFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_rfFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_gbmFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_adaFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_treebagFit.RData"))
load(paste("dataset_","_TW_SMOTE_","_linear.RData"))
all.models_SMOTE<- list(svmFit,nnetFit,rpartFit,rfFit,gbmFit,adaFit,treebagFit,linear)
names(all.models_SMOTE) <- c("SVM","NN","rpart","RF","GBM","ada","bagging","LE")
save(all.models_SMOTE,file="all.models_SMOTE.RData")
}
if(file.exists("modelsTest_SMOTE")){
load("modelsTest_SMOTE")
}else{
modelsTest_SMOTE<-classPrediction(all.models_SMOTE,inputsTest_p2,"_TW_SMOTE_")
save(modelsTest_SMOTE,file="modelsTest_SMOTE.RData")
}
## Loading required package: kernlab
####################show all the errors ############################
sapply(modelsTest_previous,function(x) classModelErrors(x,targetsTestClass_p2,0.4))
## svmRadial nnet rpart rf gbm ada
## TNR 1.00000 NA NA 1.00000 0.99724 NA
## TPR 0.02941 NA NA 0.02941 0.02941 NA
## Precision 1.00000 0.02291 0.02291 1.00000 0.20000 0.02291
## Recall 0.02941 NA NA 0.02941 0.02941 NA
## G-mean 0.17150 NA NA 0.17150 0.17126 NA
## Weighted Accuracy 0.61176 NA NA 0.61176 0.61011 NA
## F-measure 0.05714 NA NA 0.05714 0.05128 NA
## AUC 0.51471 0.50000 0.50000 0.51471 0.51333 0.50000
## treebag NULL
## TNR 1.00000 NA
## TPR 0.02941 NA
## Precision 1.00000 0.02291
## Recall 0.02941 NA
## G-mean 0.17150 NA
## Weighted Accuracy 0.61176 NA
## F-measure 0.05714 NA
## AUC 0.51471 0.50000
sapply(modelsTest_ubOver,function(x) classModelErrors(x,targetsTestClass_p2,0.4))
## svmRadial nnet rpart rf gbm ada treebag
## TNR 0.99862 0.9510 0.9538 0.9986 0.9910 0.8924 0.9821
## TPR 0.02941 0.2941 0.2941 0.1176 0.1471 0.6471 0.1765
## Precision 0.33333 0.1235 0.1299 0.6667 0.2778 0.1236 0.1875
## Recall 0.02941 0.2941 0.2941 0.1176 0.1471 0.6471 0.1765
## G-mean 0.17138 0.5289 0.5296 0.3428 0.3818 0.7599 0.4163
## Weighted Accuracy 0.61094 0.6883 0.6899 0.6462 0.6534 0.7943 0.6598
## F-measure 0.05405 0.1739 0.1802 0.2000 0.1923 0.2075 0.1818
## AUC 0.51402 0.6226 0.6240 0.5581 0.5690 0.7697 0.5793
## NULL
## TNR 1.00000
## TPR 0.02941
## Precision 1.00000
## Recall 0.02941
## G-mean 0.17150
## Weighted Accuracy 0.61176
## F-measure 0.05714
## AUC 0.51471
sapply(modelsTest_ubUnder10,function(x) classModelErrors(x,targetsTestClass_p2,0.4))
## svmRadial nnet rpart rf gbm ada treebag
## TNR 0.99931 0.9945 0.97241 0.99379 0.9814 0.9903 0.9814
## TPR 0.02941 0.1176 0.08824 0.05882 0.1765 0.1471 0.1471
## Precision 0.50000 0.3333 0.06977 0.18182 0.1818 0.2632 0.1562
## Recall 0.02941 0.1176 0.08824 0.05882 0.1765 0.1471 0.1471
## G-mean 0.17144 0.3420 0.29292 0.24178 0.4162 0.3816 0.3799
## Weighted Accuracy 0.61135 0.6437 0.61874 0.61981 0.6594 0.6530 0.6477
## F-measure 0.05556 0.1739 0.07792 0.08889 0.1791 0.1887 0.1515
## AUC 0.51436 0.5561 0.53032 0.52631 0.5789 0.5687 0.5642
## NULL
## TNR 0.01034
## TPR 0.85294
## Precision 0.01981
## Recall 0.85294
## G-mean 0.09393
## Weighted Accuracy 0.34738
## F-measure 0.03872
## AUC 0.56836
sapply(modelsTest_ubUnder30,function(x) classModelErrors(x,targetsTestClass_p2,0.4))
## svmRadial nnet rpart rf gbm ada treebag
## TNR 0.83241 0.7800 0.8248 0.8917 0.8483 0.8545 0.8717
## TPR 0.67647 0.5882 0.8529 0.7353 0.7647 0.8235 0.7941
## Precision 0.08647 0.0590 0.1025 0.1374 0.1057 0.1172 0.1268
## Recall 0.67647 0.5882 0.8529 0.7353 0.7647 0.8235 0.7941
## G-mean 0.75040 0.6774 0.8388 0.8097 0.8054 0.8389 0.8320
## Weighted Accuracy 0.77004 0.7033 0.8361 0.8292 0.8148 0.8421 0.8407
## F-measure 0.15333 0.1072 0.1830 0.2315 0.1857 0.2051 0.2186
## AUC 0.75444 0.6841 0.8389 0.8135 0.8065 0.8390 0.8329
## NULL
## TNR 0.196552
## TPR 0.323529
## Precision 0.009354
## Recall 0.323529
## G-mean 0.252171
## Weighted Accuracy 0.247343
## F-measure 0.018182
## AUC 0.739959
sapply(modelsTest_SMOTE,function(x) classModelErrors(x,targetsTestClass_p2,0.4))
## svmRadial nnet rpart rf gbm ada treebag
## TNR 0.98138 0.8993 0.9255 0.9317 0.9372 0.9166 0.9179
## TPR 0.08824 0.5294 0.4118 0.5294 0.3529 0.7353 0.5882
## Precision 0.10000 0.1098 0.1148 0.1538 0.1165 0.1712 0.1439
## Recall 0.08824 0.5294 0.4118 0.5294 0.3529 0.7353 0.5882
## G-mean 0.29427 0.6900 0.6173 0.7023 0.5751 0.8209 0.7348
## Weighted Accuracy 0.62412 0.7514 0.7200 0.7708 0.7035 0.8440 0.7861
## F-measure 0.09375 0.1818 0.1795 0.2384 0.1752 0.2778 0.2312
## AUC 0.53481 0.7144 0.6686 0.7306 0.6451 0.8259 0.7531
## NULL
## TNR 0.02207
## TPR 0.91176
## Precision 0.02139
## Recall 0.91176
## G-mean 0.14185
## Weighted Accuracy 0.37795
## F-measure 0.04181
## AUC 0.53308
# xtable(sapply(modelsTest_previous,function(x) classModelErrors(x,targetsTestClass_p2,0.4)),digits=c(0,3,3,3,3,3,3,3,3))
# xtable(sapply(modelsTest_ubOver,function(x) classModelErrors(x,targetsTestClass_p2,0.4)),digits=c(0,3,3,3,3,3,3,3,3),caption="over sampling")
# xtable(sapply(modelsTest_ubUnder10,function(x) classModelErrors(x,targetsTestClass_p2,0.4)),digits=c(0,3,3,3,3,3,3,3,3))
# xtable(sapply(modelsTest_ubUnder30,function(x) classModelErrors(x,targetsTestClass_p2,0.4)),digits=c(0,3,3,3,3,3,3,3,3))
# xtable(sapply(modelsTest_SMOTE,function(x) classModelErrors(x,targetsTestClass_p2,0.4)),digits=c(0,3,3,3,3,3,3,3,3),caption="SMOTE")
variables importance
load(paste("dataset_","_TW_SMOTE_","_rpartFit.RData"))
rpartImp<- varImp(rpartFit, scale = FALSE)$importance
## Loading required package: rpart
as.matrix(rpartImp)->rpartImp
load(paste("dataset_","_TW_SMOTE_","_rfFit.RData"))
rfImp<- varImp(rfFit, scale = FALSE)$importance
as.matrix(rfImp)->rfImp
load(paste("dataset_","_TW_SMOTE_","_gbmFit.RData"))
gbmImp<- varImp(gbmFit, scale = FALSE)$importance
## Loading required package: plyr
as.matrix(gbmImp)->gbmImp
load(paste("dataset_","_TW_SMOTE_","_treebagFit.RData"))
treebagImp<- varImp(treebagFit, scale = FALSE)$importance
## Loading required package: ipred
as.matrix(treebagImp)->treebagImp
#############combining##################
merge(rpartImp,rfImp,by="row.names",all=TRUE)->rpart_rf_imp
merge(rpart_rf_imp,gbmImp,by.x="Row.names",by.y="row.names",all=TRUE)->rpart_rf_gmb_imp
merge(rpart_rf_gmb_imp,treebagImp,by.x="Row.names",by.y="row.names",all=TRUE)->rpart_rf_gmb_treebag_imp
## Warning: column names 'Overall.x', 'Overall.y' are duplicated in the
## result
colnames(rpart_rf_gmb_treebag_imp)<-c("variables","CART","RF","GBM","bagging")
as.matrix(rpart_rf_gmb_treebag_imp[,c("CART","RF","GBM","bagging")])->imp
rownames(imp)<-rpart_rf_gmb_treebag_imp[,1]
imp[ order(imp[,1], imp[,2],imp[,3], imp[,4]), ]->imp.order
################drow the plot of variable importance using lattice package###########
# pdf(file="imp_SMOTE_TW.pdf",width=14 ,height=10)
dotplot(imp.order, groups = FALSE,mar=rep(0, 4),transparent=TRUE,
layout = c(2, 2), aspect = 0.7,
origin = 0, type = c("p", "h"),
xlab = "Variable importance")
# dev.off()