Divide data into three parts
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(ipred)
data(iris)
irisClass <- iris[,5]
irisData <- iris[,-5]
iris <- iris[sample(nrow(iris)),]
split <- floor(nrow(iris)/3)
ensembleData <- iris[0:split,]
blenderData <- iris[(split+1):(split*2),]
testingData <- iris[(split*2+1):nrow(iris),]
myControl <- trainControl(method='cv', number=3, returnResamp='none')
train all the ensemble models with ensembleData
treebagModel <- train(Species~.,data =ensembleData,method = "treebag",trControl =myControl)
## Loading required package: plyr
## Loading required package: e1071
rpartModel <- train(Species~.,data =ensembleData,method = "rpart",trControl =myControl)
## Loading required package: rpart
rfModel <- train(Species~.,data =ensembleData,method = "rf",tuneGrid=data.frame(.mtry=3),tunelength = 10, ntrees = 2000,importance = TRUE,trControl =myControl)
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
get predictions for each ensemble model for testingdata and add them back to testingdata
testingData$treebag_PROB <- predict(treebagModel, testingData[,1:4])
testingData$rpart_PROB <- predict(rpartModel, testingData[,1:4])
testingData$rf_PROB <- predict(rfModel, testingData[,1:4])
train final model with the blenders data
testingDataPredictors1<-testingData[,1:4]
testingDataPredictors2<-testingData[,6:8]
testingDataPredictor<-cbind(testingDataPredictors1,testingDataPredictors2)
testingDataClass<-testingData[,5]
final_blender_model <- train(Species~.,data =blenderData, method='rf', trControl=myControl)
preds <- predict(final_blender_model,testingDataPredictor)
confusionMatrix(preds,testingDataClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 2
## virginica 0 1 19
##
## Overall Statistics
##
## Accuracy : 0.94
## 95% CI : (0.8345, 0.9875)
## No Information Rate : 0.42
## P-Value [Acc > NIR] : 7.853e-15
##
## Kappa : 0.9088
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0 0.9286 0.9048
## Specificity 1.0 0.9444 0.9655
## Pos Pred Value 1.0 0.8667 0.9500
## Neg Pred Value 1.0 0.9714 0.9333
## Prevalence 0.3 0.2800 0.4200
## Detection Rate 0.3 0.2600 0.3800
## Detection Prevalence 0.3 0.3000 0.4000
## Balanced Accuracy 1.0 0.9365 0.9351
# Individual model
confusionMatrix(testingData$treebag_PROB,testingDataClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 14 1
## virginica 0 0 20
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.8935, 0.9995)
## No Information Rate : 0.42
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9696
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0 1.0000 0.9524
## Specificity 1.0 0.9722 1.0000
## Pos Pred Value 1.0 0.9333 1.0000
## Neg Pred Value 1.0 1.0000 0.9667
## Prevalence 0.3 0.2800 0.4200
## Detection Rate 0.3 0.2800 0.4000
## Detection Prevalence 0.3 0.3000 0.4000
## Balanced Accuracy 1.0 0.9861 0.9762
confusionMatrix(testingData$rpart_PROB,testingDataClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 14 1
## virginica 0 0 20
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.8935, 0.9995)
## No Information Rate : 0.42
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9696
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0 1.0000 0.9524
## Specificity 1.0 0.9722 1.0000
## Pos Pred Value 1.0 0.9333 1.0000
## Neg Pred Value 1.0 1.0000 0.9667
## Prevalence 0.3 0.2800 0.4200
## Detection Rate 0.3 0.2800 0.4000
## Detection Prevalence 0.3 0.3000 0.4000
## Balanced Accuracy 1.0 0.9861 0.9762
confusionMatrix(testingData$rf_PROB,testingDataClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 14 1
## virginica 0 0 20
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.8935, 0.9995)
## No Information Rate : 0.42
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9696
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0 1.0000 0.9524
## Specificity 1.0 0.9722 1.0000
## Pos Pred Value 1.0 0.9333 1.0000
## Neg Pred Value 1.0 1.0000 0.9667
## Prevalence 0.3 0.2800 0.4200
## Detection Rate 0.3 0.2800 0.4000
## Detection Prevalence 0.3 0.3000 0.4000
## Balanced Accuracy 1.0 0.9861 0.9762