#Loading the required libraries
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
#Seeting the random seed
set.seed(1)
#Loading the hackathon dataset
train <- read.csv("Train.csv")
test <- read.csv("Test.csv")
train$IsGoodNews <- factor(train$IsGoodNews, levels = c(0,1), labels = c("No", "Yes"))
#Does the data contain missing values
sum(is.na(train))
## [1] 0
#Imputing missing values using median
preProcValues <- preProcess(train, method = c("medianImpute","center","scale"))
library('RANN')
data_processed <- predict(preProcValues, train)
sum(is.na(data_processed))
## [1] 0
#Defining the training controls for multiple models
fitControl <- trainControl(
method = "cv",
number = 5,
savePredictions = 'final',
classProbs = T)
#Spliting training set into two parts based on outcome: 75% and 25%
index <- createDataPartition(data_processed$IsGoodNews, p=0.75, list=FALSE)
trainSet <- data_processed[ index,]
testSet <- data_processed[-index,]
#Defining the predictors and outcome
predictors<-c('Freq_Of_Word_1','Freq_Of_Word_10','Freq_Of_Word_11','Freq_Of_Word_12','Freq_Of_Word_13','Freq_Of_Word_14','Freq_Of_Word_15','Freq_Of_Word_16','Freq_Of_Word_17','Freq_Of_Word_18','Freq_Of_Word_19','Freq_Of_Word_2','Freq_Of_Word_20','Freq_Of_Word_21','Freq_Of_Word_22','Freq_Of_Word_23','Freq_Of_Word_24','Freq_Of_Word_25','Freq_Of_Word_26','Freq_Of_Word_27','Freq_Of_Word_28','Freq_Of_Word_29','Freq_Of_Word_3','Freq_Of_Word_30','Freq_Of_Word_31','Freq_Of_Word_32','Freq_Of_Word_33','Freq_Of_Word_34','Freq_Of_Word_35','Freq_Of_Word_36','Freq_Of_Word_37','Freq_Of_Word_38','Freq_Of_Word_39','Freq_Of_Word_4','Freq_Of_Word_40','Freq_Of_Word_41','Freq_Of_Word_42','Freq_Of_Word_43','Freq_Of_Word_44','Freq_Of_Word_45','Freq_Of_Word_46','Freq_Of_Word_47','Freq_Of_Word_48','Freq_Of_Word_49','Freq_Of_Word_5','Freq_Of_Word_50','Freq_Of_Word_6','Freq_Of_Word_7','Freq_Of_Word_8','Freq_Of_Word_9','LengthOFFirstParagraph','StylizedLetters','TotalEmojiCharacters')
outcomeName<-'IsGoodNews'
Now let’s get started with training a random forest and test its accuracy on the test set that we have created:
set.seed(42)
#Training the random forest model
model_rf <- train(trainSet[, predictors], trainSet[, outcomeName], method = 'rf',
trControl = fitControl, tuneLength = 3)
#Predicting using random forest model
testSet$pred_rf <- predict(object = model_rf, newdata = testSet[, predictors])
#Checking the accuracy of the random forest model
confusionMatrix(testSet$IsGoodNews, testSet$pred_rf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 138 6
## Yes 16 76
##
## Accuracy : 0.9068
## 95% CI : (0.8623, 0.9407)
## No Information Rate : 0.6525
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8001
##
## Mcnemar's Test P-Value : 0.05501
##
## Sensitivity : 0.8961
## Specificity : 0.9268
## Pos Pred Value : 0.9583
## Neg Pred Value : 0.8261
## Prevalence : 0.6525
## Detection Rate : 0.5847
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9115
##
## 'Positive' Class : No
##
Well, as you can see, we got 0.91 accuracy with the individual random forest model. Let’s see the performance of KNN:
set.seed(42)
#Training the knn model
model_knn <- train(trainSet[, predictors], trainSet[, outcomeName],
method = 'knn', trControl = fitControl, tuneLength = 3)
#Predicting using knn model
testSet$pred_knn <- predict(object = model_knn, testSet[,predictors])
#Checking the accuracy of the random forest model
confusionMatrix(testSet$IsGoodNews, testSet$pred_knn)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 137 7
## Yes 22 70
##
## Accuracy : 0.8771
## 95% CI : (0.8283, 0.9161)
## No Information Rate : 0.6737
## P-Value [Acc > NIR] : 4.205e-13
##
## Kappa : 0.7339
##
## Mcnemar's Test P-Value : 0.00933
##
## Sensitivity : 0.8616
## Specificity : 0.9091
## Pos Pred Value : 0.9514
## Neg Pred Value : 0.7609
## Prevalence : 0.6737
## Detection Rate : 0.5805
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.8854
##
## 'Positive' Class : No
##
We are able to get 0.88 accuracy with the individual KNN model. Let’s see the performance of Logistic regression as well before we go on to create ensemble of these three.
set.seed(42)
#Training the Logistic regression model
model_lr <- train(trainSet[, predictors], trainSet[, outcomeName],
method = 'glm', trControl = fitControl,tuneLength = 3)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#Predicting using knn model
testSet$pred_lr <- predict(object = model_lr, testSet[,predictors])
#Checking the accuracy of the random forest model
confusionMatrix(testSet$IsGoodNews, testSet$pred_lr)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 137 7
## Yes 9 83
##
## Accuracy : 0.9322
## 95% CI : (0.8922, 0.9608)
## No Information Rate : 0.6186
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8569
##
## Mcnemar's Test P-Value : 0.8026
##
## Sensitivity : 0.9384
## Specificity : 0.9222
## Pos Pred Value : 0.9514
## Neg Pred Value : 0.9022
## Prevalence : 0.6186
## Detection Rate : 0.5805
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9303
##
## 'Positive' Class : No
##
And the logistic regression also gives us the accuracy of 0.93
Now, let’s try out different ways of forming an ensemble with these models as we have discussed:
#Predicting the probabilities
testSet$pred_rf_prob <- predict(object = model_rf, testSet[, predictors], type = 'prob')
testSet$pred_knn_prob <- predict(object = model_knn, testSet[, predictors], type = 'prob')
testSet$pred_lr_prob <- predict(object = model_lr, testSet[, predictors], type = 'prob')
#Taking average of predictions
testSet$pred_avg <- (testSet$pred_rf_prob$Y + testSet$pred_knn_prob$Y + testSet$pred_lr_prob$Y) / 3
#Splitting into binary classes at 0.5
testSet$pred_avg <- as.factor(ifelse(testSet$pred_avg > 0.5, 'Yes', 'No'))
#The majority vote
testSet$pred_majority <- as.factor(ifelse(testSet$pred_rf == 'Yes' & testSet$pred_knn == 'Yes', 'Yes', ifelse(testSet$pred_rf == 'Yes' & testSet$pred_lr == 'Yes','Yes', ifelse(testSet$pred_knn == 'Yes' & testSet$pred_lr == 'Yes', 'Yes', 'No'))))
#Taking weighted average of predictions
testSet$pred_weighted_avg <- (testSet$pred_rf_prob$Yes*0.25) + (testSet$pred_knn_prob$Yes*0.25) + (testSet$pred_lr_prob$Yes*0.5)
#Splitting into binary classes at 0.5
testSet$pred_weighted_avg <- as.factor(ifelse(testSet$pred_weighted_avg > 0.5, 'Yes', 'No'))
Checking the accuracy of all three methods on the test set
# Confusion Matrix for Average
confusionMatrix(testSet$IsGoodNews, testSet$pred_avg)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 141 3
## Yes 11 81
##
## Accuracy : 0.9407
## 95% CI : (0.9025, 0.9672)
## No Information Rate : 0.6441
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8733
##
## Mcnemar's Test P-Value : 0.06137
##
## Sensitivity : 0.9276
## Specificity : 0.9643
## Pos Pred Value : 0.9792
## Neg Pred Value : 0.8804
## Prevalence : 0.6441
## Detection Rate : 0.5975
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9460
##
## 'Positive' Class : No
##
# Confusion Matrix for Voting
confusionMatrix(testSet$IsGoodNews, testSet$pred_majority)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 139 5
## Yes 10 82
##
## Accuracy : 0.9364
## 95% CI : (0.8973, 0.964)
## No Information Rate : 0.6314
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8651
##
## Mcnemar's Test P-Value : 0.3017
##
## Sensitivity : 0.9329
## Specificity : 0.9425
## Pos Pred Value : 0.9653
## Neg Pred Value : 0.8913
## Prevalence : 0.6314
## Detection Rate : 0.5890
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9377
##
## 'Positive' Class : No
##
# Confusion Matrix for Weigthed Average
confusionMatrix(testSet$IsGoodNews, testSet$pred_weighted_avg)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 139 5
## Yes 10 82
##
## Accuracy : 0.9364
## 95% CI : (0.8973, 0.964)
## No Information Rate : 0.6314
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8651
##
## Mcnemar's Test P-Value : 0.3017
##
## Sensitivity : 0.9329
## Specificity : 0.9425
## Pos Pred Value : 0.9653
## Neg Pred Value : 0.8913
## Prevalence : 0.6314
## Detection Rate : 0.5890
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9377
##
## 'Positive' Class : No
##
Before proceeding further, I would like you to recall about two important criteria that we previously discussed on individual model accuracy and inter-model prediction correlation which must be fulfilled. In the above ensembles, I have skipped checking for the correlation between the predictions of the three models. I have randomly chosen these three models for a demonstration of the concepts. If the predictions are highly correlated, then using these three might not give better results than individual models. But you got the point. Right?
So far, we have used simple formulas at the top layer. Instead, we can use another machine learning model which is essentially what stacking is. We can use linear regression for making a linear formula for making the predictions in regression problem for mapping bottom layer model predictions to the outcome or logistic regression similarly in case of classification problem.
Moreover, we don’t need to restrict ourselves here, we can also use more complex models like GBM, neural nets to develop a non-linear mapping from the predictions of bottom layer models to the outcome.
On the same example let’s try applying logistic regression and GBM as top layer models. Remember, the following steps that we’ll take:
One extremely important thing to note in step 2 is that you should always make out of bag predictions for the training data, otherwise the importance of the base layer models will only be a function of how well a base layer model can recall the training data.
Even, most of the steps have been already done previously, but I’ll walk you through the steps one by one again.
#Defining the training control
fitControl <- trainControl(
method = "cv",
number = 10,
savePredictions = 'final', # To save out of fold predictions for best parameter combinantions
classProbs = T # To save the class probabilities of the out of fold predictions
)
#Training the random forest model
model_rf <- train(trainSet[, predictors], trainSet[, outcomeName],
method = 'rf', trControl = fitControl, tuneLength=3)
#Training the knn model
model_knn <- train(trainSet[, predictors], trainSet[, outcomeName],
method = 'knn', trControl = fitControl, tuneLength=3)
#Training the logistic regression model
model_knn <- train(trainSet[, predictors], trainSet[, outcomeName],
method = 'glm', trControl = fitControl, tuneLength=3)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#Predicting the out of fold prediction probabilities for training data
trainSet$OOF_pred_rf <- model_rf$pred$Yes[order(model_rf$pred$rowIndex)]
trainSet$OOF_pred_knn <- model_knn$pred$Yes[order(model_knn$pred$rowIndex)]
trainSet$OOF_pred_lr <- model_lr$pred$Yes[order(model_lr$pred$rowIndex)]
#Predicting probabilities for the test data
testSet$OOF_pred_rf <- predict(model_rf, testSet[predictors], type = 'prob')$Yes
testSet$OOF_pred_knn <- predict(model_knn, testSet[predictors], type = 'prob')$Yes
testSet$OOF_pred_lr <- predict(model_lr, testSet[predictors], type = 'prob')$Yes
#Predictors for top layer models
predictors_top <- c('OOF_pred_rf', 'OOF_pred_knn', 'OOF_pred_lr')
#GBM as top layer model
model_gbm <- train(trainSet[, predictors_top], trainSet[, outcomeName],
method = 'gbm', trControl = fitControl, tuneLength = 3)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2002 nan 0.1000 0.0635
## 2 1.0977 nan 0.1000 0.0519
## 3 1.0101 nan 0.1000 0.0420
## 4 0.9336 nan 0.1000 0.0364
## 5 0.8775 nan 0.1000 0.0278
## 6 0.8272 nan 0.1000 0.0242
## 7 0.7776 nan 0.1000 0.0219
## 8 0.7399 nan 0.1000 0.0171
## 9 0.7062 nan 0.1000 0.0146
## 10 0.6733 nan 0.1000 0.0153
## 20 0.5030 nan 0.1000 0.0042
## 40 0.4138 nan 0.1000 0.0003
## 60 0.3914 nan 0.1000 0.0001
## 80 0.3812 nan 0.1000 -0.0013
## 100 0.3743 nan 0.1000 -0.0007
## 120 0.3649 nan 0.1000 -0.0002
## 140 0.3585 nan 0.1000 -0.0006
## 150 0.3560 nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1994 nan 0.1000 0.0664
## 2 1.0894 nan 0.1000 0.0516
## 3 0.9973 nan 0.1000 0.0427
## 4 0.9226 nan 0.1000 0.0374
## 5 0.8582 nan 0.1000 0.0307
## 6 0.8041 nan 0.1000 0.0251
## 7 0.7554 nan 0.1000 0.0230
## 8 0.7155 nan 0.1000 0.0194
## 9 0.6785 nan 0.1000 0.0166
## 10 0.6446 nan 0.1000 0.0146
## 20 0.4721 nan 0.1000 0.0014
## 40 0.3840 nan 0.1000 -0.0017
## 60 0.3444 nan 0.1000 -0.0011
## 80 0.3224 nan 0.1000 0.0000
## 100 0.3060 nan 0.1000 -0.0005
## 120 0.2846 nan 0.1000 -0.0005
## 140 0.2709 nan 0.1000 -0.0006
## 150 0.2650 nan 0.1000 -0.0002
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1986 nan 0.1000 0.0670
## 2 1.0841 nan 0.1000 0.0532
## 3 0.9918 nan 0.1000 0.0459
## 4 0.9140 nan 0.1000 0.0375
## 5 0.8475 nan 0.1000 0.0316
## 6 0.7898 nan 0.1000 0.0279
## 7 0.7429 nan 0.1000 0.0225
## 8 0.7014 nan 0.1000 0.0196
## 9 0.6640 nan 0.1000 0.0156
## 10 0.6322 nan 0.1000 0.0147
## 20 0.4580 nan 0.1000 0.0019
## 40 0.3627 nan 0.1000 -0.0001
## 60 0.3095 nan 0.1000 -0.0013
## 80 0.2857 nan 0.1000 -0.0021
## 100 0.2563 nan 0.1000 0.0000
## 120 0.2402 nan 0.1000 -0.0006
## 140 0.2239 nan 0.1000 -0.0009
## 150 0.2160 nan 0.1000 -0.0020
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2094 nan 0.1000 0.0632
## 2 1.1124 nan 0.1000 0.0462
## 3 1.0255 nan 0.1000 0.0422
## 4 0.9477 nan 0.1000 0.0362
## 5 0.8875 nan 0.1000 0.0307
## 6 0.8348 nan 0.1000 0.0242
## 7 0.7910 nan 0.1000 0.0199
## 8 0.7562 nan 0.1000 0.0162
## 9 0.7214 nan 0.1000 0.0151
## 10 0.6889 nan 0.1000 0.0149
## 20 0.5185 nan 0.1000 0.0040
## 40 0.4349 nan 0.1000 -0.0004
## 60 0.4168 nan 0.1000 -0.0004
## 80 0.4037 nan 0.1000 -0.0012
## 100 0.3959 nan 0.1000 -0.0007
## 120 0.3850 nan 0.1000 -0.0003
## 140 0.3786 nan 0.1000 -0.0006
## 150 0.3747 nan 0.1000 -0.0002
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1999 nan 0.1000 0.0672
## 2 1.0931 nan 0.1000 0.0524
## 3 1.0029 nan 0.1000 0.0440
## 4 0.9271 nan 0.1000 0.0358
## 5 0.8622 nan 0.1000 0.0305
## 6 0.8078 nan 0.1000 0.0267
## 7 0.7608 nan 0.1000 0.0235
## 8 0.7212 nan 0.1000 0.0182
## 9 0.6887 nan 0.1000 0.0148
## 10 0.6545 nan 0.1000 0.0138
## 20 0.4883 nan 0.1000 0.0020
## 40 0.3993 nan 0.1000 -0.0009
## 60 0.3623 nan 0.1000 -0.0003
## 80 0.3364 nan 0.1000 -0.0017
## 100 0.3229 nan 0.1000 -0.0004
## 120 0.3095 nan 0.1000 -0.0010
## 140 0.2950 nan 0.1000 -0.0005
## 150 0.2909 nan 0.1000 -0.0007
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1950 nan 0.1000 0.0677
## 2 1.0836 nan 0.1000 0.0528
## 3 0.9915 nan 0.1000 0.0445
## 4 0.9154 nan 0.1000 0.0345
## 5 0.8518 nan 0.1000 0.0293
## 6 0.7992 nan 0.1000 0.0241
## 7 0.7551 nan 0.1000 0.0183
## 8 0.7130 nan 0.1000 0.0213
## 9 0.6735 nan 0.1000 0.0181
## 10 0.6431 nan 0.1000 0.0126
## 20 0.4638 nan 0.1000 0.0034
## 40 0.3579 nan 0.1000 -0.0011
## 60 0.3224 nan 0.1000 -0.0015
## 80 0.2952 nan 0.1000 -0.0013
## 100 0.2661 nan 0.1000 -0.0003
## 120 0.2472 nan 0.1000 -0.0005
## 140 0.2290 nan 0.1000 -0.0015
## 150 0.2225 nan 0.1000 -0.0011
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2040 nan 0.1000 0.0635
## 2 1.1017 nan 0.1000 0.0491
## 3 1.0149 nan 0.1000 0.0428
## 4 0.9447 nan 0.1000 0.0297
## 5 0.8799 nan 0.1000 0.0305
## 6 0.8255 nan 0.1000 0.0261
## 7 0.7810 nan 0.1000 0.0200
## 8 0.7438 nan 0.1000 0.0171
## 9 0.7086 nan 0.1000 0.0155
## 10 0.6782 nan 0.1000 0.0143
## 20 0.5131 nan 0.1000 0.0042
## 40 0.4265 nan 0.1000 -0.0002
## 60 0.4044 nan 0.1000 -0.0004
## 80 0.3913 nan 0.1000 -0.0002
## 100 0.3827 nan 0.1000 -0.0012
## 120 0.3763 nan 0.1000 -0.0007
## 140 0.3677 nan 0.1000 -0.0001
## 150 0.3637 nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1991 nan 0.1000 0.0670
## 2 1.0882 nan 0.1000 0.0530
## 3 0.9975 nan 0.1000 0.0423
## 4 0.9241 nan 0.1000 0.0349
## 5 0.8600 nan 0.1000 0.0302
## 6 0.8023 nan 0.1000 0.0274
## 7 0.7545 nan 0.1000 0.0219
## 8 0.7143 nan 0.1000 0.0197
## 9 0.6779 nan 0.1000 0.0168
## 10 0.6464 nan 0.1000 0.0149
## 20 0.4752 nan 0.1000 0.0043
## 40 0.3914 nan 0.1000 0.0003
## 60 0.3557 nan 0.1000 -0.0005
## 80 0.3350 nan 0.1000 -0.0006
## 100 0.3139 nan 0.1000 -0.0013
## 120 0.2975 nan 0.1000 -0.0012
## 140 0.2844 nan 0.1000 -0.0011
## 150 0.2759 nan 0.1000 0.0000
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1992 nan 0.1000 0.0690
## 2 1.0876 nan 0.1000 0.0567
## 3 0.9907 nan 0.1000 0.0439
## 4 0.9102 nan 0.1000 0.0351
## 5 0.8457 nan 0.1000 0.0297
## 6 0.7895 nan 0.1000 0.0258
## 7 0.7397 nan 0.1000 0.0239
## 8 0.7005 nan 0.1000 0.0177
## 9 0.6645 nan 0.1000 0.0160
## 10 0.6343 nan 0.1000 0.0134
## 20 0.4565 nan 0.1000 0.0027
## 40 0.3571 nan 0.1000 -0.0001
## 60 0.3186 nan 0.1000 -0.0013
## 80 0.2880 nan 0.1000 -0.0004
## 100 0.2699 nan 0.1000 -0.0006
## 120 0.2520 nan 0.1000 -0.0011
## 140 0.2373 nan 0.1000 -0.0027
## 150 0.2268 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2132 nan 0.1000 0.0623
## 2 1.1126 nan 0.1000 0.0492
## 3 1.0342 nan 0.1000 0.0397
## 4 0.9718 nan 0.1000 0.0299
## 5 0.9110 nan 0.1000 0.0304
## 6 0.8585 nan 0.1000 0.0245
## 7 0.8143 nan 0.1000 0.0213
## 8 0.7721 nan 0.1000 0.0188
## 9 0.7366 nan 0.1000 0.0171
## 10 0.7065 nan 0.1000 0.0128
## 20 0.5351 nan 0.1000 0.0039
## 40 0.4481 nan 0.1000 0.0008
## 60 0.4288 nan 0.1000 0.0004
## 80 0.4165 nan 0.1000 -0.0006
## 100 0.4059 nan 0.1000 -0.0003
## 120 0.3990 nan 0.1000 -0.0005
## 140 0.3934 nan 0.1000 -0.0007
## 150 0.3920 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2089 nan 0.1000 0.0648
## 2 1.1037 nan 0.1000 0.0504
## 3 1.0141 nan 0.1000 0.0435
## 4 0.9410 nan 0.1000 0.0345
## 5 0.8786 nan 0.1000 0.0305
## 6 0.8243 nan 0.1000 0.0270
## 7 0.7764 nan 0.1000 0.0216
## 8 0.7340 nan 0.1000 0.0169
## 9 0.6996 nan 0.1000 0.0167
## 10 0.6658 nan 0.1000 0.0142
## 20 0.5012 nan 0.1000 0.0037
## 40 0.4162 nan 0.1000 0.0001
## 60 0.3776 nan 0.1000 -0.0016
## 80 0.3534 nan 0.1000 -0.0010
## 100 0.3323 nan 0.1000 -0.0017
## 120 0.3188 nan 0.1000 -0.0023
## 140 0.3060 nan 0.1000 -0.0007
## 150 0.2986 nan 0.1000 -0.0018
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1994 nan 0.1000 0.0660
## 2 1.0898 nan 0.1000 0.0541
## 3 0.9999 nan 0.1000 0.0433
## 4 0.9276 nan 0.1000 0.0347
## 5 0.8627 nan 0.1000 0.0288
## 6 0.8134 nan 0.1000 0.0231
## 7 0.7634 nan 0.1000 0.0220
## 8 0.7218 nan 0.1000 0.0192
## 9 0.6869 nan 0.1000 0.0158
## 10 0.6587 nan 0.1000 0.0125
## 20 0.4844 nan 0.1000 0.0029
## 40 0.3831 nan 0.1000 -0.0026
## 60 0.3441 nan 0.1000 -0.0014
## 80 0.3101 nan 0.1000 -0.0008
## 100 0.2891 nan 0.1000 -0.0011
## 120 0.2715 nan 0.1000 -0.0018
## 140 0.2553 nan 0.1000 -0.0017
## 150 0.2462 nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2096 nan 0.1000 0.0616
## 2 1.1058 nan 0.1000 0.0516
## 3 1.0244 nan 0.1000 0.0370
## 4 0.9539 nan 0.1000 0.0352
## 5 0.8922 nan 0.1000 0.0301
## 6 0.8419 nan 0.1000 0.0246
## 7 0.8008 nan 0.1000 0.0209
## 8 0.7640 nan 0.1000 0.0180
## 9 0.7323 nan 0.1000 0.0154
## 10 0.7035 nan 0.1000 0.0120
## 20 0.5408 nan 0.1000 0.0034
## 40 0.4559 nan 0.1000 -0.0012
## 60 0.4347 nan 0.1000 -0.0005
## 80 0.4211 nan 0.1000 -0.0011
## 100 0.4109 nan 0.1000 -0.0006
## 120 0.4019 nan 0.1000 -0.0002
## 140 0.3974 nan 0.1000 -0.0008
## 150 0.3941 nan 0.1000 -0.0010
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2044 nan 0.1000 0.0650
## 2 1.0955 nan 0.1000 0.0510
## 3 1.0045 nan 0.1000 0.0418
## 4 0.9319 nan 0.1000 0.0313
## 5 0.8701 nan 0.1000 0.0302
## 6 0.8108 nan 0.1000 0.0260
## 7 0.7680 nan 0.1000 0.0210
## 8 0.7253 nan 0.1000 0.0182
## 9 0.6916 nan 0.1000 0.0152
## 10 0.6637 nan 0.1000 0.0119
## 20 0.5012 nan 0.1000 0.0023
## 40 0.4183 nan 0.1000 0.0009
## 60 0.3775 nan 0.1000 -0.0006
## 80 0.3495 nan 0.1000 -0.0017
## 100 0.3291 nan 0.1000 -0.0020
## 120 0.3160 nan 0.1000 -0.0010
## 140 0.3025 nan 0.1000 -0.0005
## 150 0.2956 nan 0.1000 -0.0010
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2014 nan 0.1000 0.0664
## 2 1.0909 nan 0.1000 0.0525
## 3 1.0003 nan 0.1000 0.0437
## 4 0.9253 nan 0.1000 0.0357
## 5 0.8637 nan 0.1000 0.0299
## 6 0.8096 nan 0.1000 0.0266
## 7 0.7619 nan 0.1000 0.0223
## 8 0.7217 nan 0.1000 0.0172
## 9 0.6831 nan 0.1000 0.0161
## 10 0.6522 nan 0.1000 0.0146
## 20 0.4812 nan 0.1000 0.0022
## 40 0.3802 nan 0.1000 -0.0013
## 60 0.3360 nan 0.1000 -0.0008
## 80 0.3051 nan 0.1000 -0.0009
## 100 0.2846 nan 0.1000 -0.0023
## 120 0.2668 nan 0.1000 -0.0019
## 140 0.2485 nan 0.1000 -0.0013
## 150 0.2436 nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2101 nan 0.1000 0.0642
## 2 1.1096 nan 0.1000 0.0464
## 3 1.0250 nan 0.1000 0.0393
## 4 0.9518 nan 0.1000 0.0371
## 5 0.8869 nan 0.1000 0.0305
## 6 0.8361 nan 0.1000 0.0234
## 7 0.7907 nan 0.1000 0.0217
## 8 0.7480 nan 0.1000 0.0179
## 9 0.7131 nan 0.1000 0.0151
## 10 0.6802 nan 0.1000 0.0133
## 20 0.5151 nan 0.1000 0.0037
## 40 0.4353 nan 0.1000 -0.0007
## 60 0.4188 nan 0.1000 -0.0011
## 80 0.4043 nan 0.1000 -0.0017
## 100 0.3954 nan 0.1000 -0.0005
## 120 0.3876 nan 0.1000 -0.0000
## 140 0.3802 nan 0.1000 -0.0008
## 150 0.3762 nan 0.1000 -0.0008
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2008 nan 0.1000 0.0633
## 2 1.0973 nan 0.1000 0.0468
## 3 1.0125 nan 0.1000 0.0429
## 4 0.9347 nan 0.1000 0.0387
## 5 0.8699 nan 0.1000 0.0322
## 6 0.8129 nan 0.1000 0.0266
## 7 0.7625 nan 0.1000 0.0226
## 8 0.7202 nan 0.1000 0.0196
## 9 0.6842 nan 0.1000 0.0163
## 10 0.6530 nan 0.1000 0.0143
## 20 0.4838 nan 0.1000 0.0028
## 40 0.4036 nan 0.1000 -0.0005
## 60 0.3687 nan 0.1000 -0.0010
## 80 0.3376 nan 0.1000 -0.0017
## 100 0.3153 nan 0.1000 -0.0018
## 120 0.2971 nan 0.1000 -0.0006
## 140 0.2822 nan 0.1000 -0.0012
## 150 0.2744 nan 0.1000 -0.0005
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1979 nan 0.1000 0.0661
## 2 1.0879 nan 0.1000 0.0557
## 3 1.0036 nan 0.1000 0.0401
## 4 0.9228 nan 0.1000 0.0369
## 5 0.8563 nan 0.1000 0.0317
## 6 0.8001 nan 0.1000 0.0272
## 7 0.7500 nan 0.1000 0.0225
## 8 0.7080 nan 0.1000 0.0206
## 9 0.6715 nan 0.1000 0.0180
## 10 0.6408 nan 0.1000 0.0145
## 20 0.4715 nan 0.1000 0.0036
## 40 0.3720 nan 0.1000 -0.0012
## 60 0.3287 nan 0.1000 -0.0007
## 80 0.2979 nan 0.1000 -0.0008
## 100 0.2793 nan 0.1000 -0.0005
## 120 0.2601 nan 0.1000 -0.0003
## 140 0.2435 nan 0.1000 -0.0014
## 150 0.2366 nan 0.1000 -0.0011
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2062 nan 0.1000 0.0644
## 2 1.1043 nan 0.1000 0.0504
## 3 1.0136 nan 0.1000 0.0409
## 4 0.9471 nan 0.1000 0.0333
## 5 0.8831 nan 0.1000 0.0306
## 6 0.8274 nan 0.1000 0.0261
## 7 0.7836 nan 0.1000 0.0217
## 8 0.7458 nan 0.1000 0.0166
## 9 0.7080 nan 0.1000 0.0173
## 10 0.6774 nan 0.1000 0.0146
## 20 0.5093 nan 0.1000 0.0040
## 40 0.4151 nan 0.1000 0.0006
## 60 0.3939 nan 0.1000 -0.0003
## 80 0.3861 nan 0.1000 -0.0002
## 100 0.3781 nan 0.1000 -0.0007
## 120 0.3702 nan 0.1000 -0.0013
## 140 0.3648 nan 0.1000 -0.0001
## 150 0.3616 nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2010 nan 0.1000 0.0644
## 2 1.0940 nan 0.1000 0.0538
## 3 1.0038 nan 0.1000 0.0454
## 4 0.9264 nan 0.1000 0.0378
## 5 0.8625 nan 0.1000 0.0309
## 6 0.8038 nan 0.1000 0.0259
## 7 0.7569 nan 0.1000 0.0238
## 8 0.7151 nan 0.1000 0.0188
## 9 0.6807 nan 0.1000 0.0166
## 10 0.6495 nan 0.1000 0.0138
## 20 0.4740 nan 0.1000 0.0042
## 40 0.3878 nan 0.1000 -0.0010
## 60 0.3456 nan 0.1000 -0.0003
## 80 0.3238 nan 0.1000 -0.0008
## 100 0.3042 nan 0.1000 0.0000
## 120 0.2902 nan 0.1000 -0.0012
## 140 0.2798 nan 0.1000 -0.0014
## 150 0.2700 nan 0.1000 -0.0008
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1976 nan 0.1000 0.0698
## 2 1.0883 nan 0.1000 0.0540
## 3 0.9938 nan 0.1000 0.0457
## 4 0.9174 nan 0.1000 0.0389
## 5 0.8488 nan 0.1000 0.0319
## 6 0.7899 nan 0.1000 0.0257
## 7 0.7409 nan 0.1000 0.0230
## 8 0.6974 nan 0.1000 0.0192
## 9 0.6619 nan 0.1000 0.0165
## 10 0.6348 nan 0.1000 0.0138
## 20 0.4641 nan 0.1000 0.0030
## 40 0.3596 nan 0.1000 -0.0005
## 60 0.3150 nan 0.1000 -0.0018
## 80 0.2882 nan 0.1000 -0.0005
## 100 0.2655 nan 0.1000 -0.0010
## 120 0.2444 nan 0.1000 -0.0009
## 140 0.2286 nan 0.1000 -0.0009
## 150 0.2213 nan 0.1000 -0.0003
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2081 nan 0.1000 0.0610
## 2 1.1003 nan 0.1000 0.0471
## 3 1.0212 nan 0.1000 0.0388
## 4 0.9488 nan 0.1000 0.0344
## 5 0.8897 nan 0.1000 0.0275
## 6 0.8389 nan 0.1000 0.0251
## 7 0.7968 nan 0.1000 0.0188
## 8 0.7631 nan 0.1000 0.0169
## 9 0.7258 nan 0.1000 0.0163
## 10 0.6966 nan 0.1000 0.0145
## 20 0.5319 nan 0.1000 0.0039
## 40 0.4470 nan 0.1000 -0.0005
## 60 0.4264 nan 0.1000 -0.0008
## 80 0.4146 nan 0.1000 -0.0010
## 100 0.4068 nan 0.1000 -0.0016
## 120 0.4012 nan 0.1000 -0.0011
## 140 0.3950 nan 0.1000 -0.0006
## 150 0.3904 nan 0.1000 -0.0005
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1967 nan 0.1000 0.0649
## 2 1.0905 nan 0.1000 0.0509
## 3 1.0020 nan 0.1000 0.0437
## 4 0.9269 nan 0.1000 0.0366
## 5 0.8657 nan 0.1000 0.0308
## 6 0.8119 nan 0.1000 0.0261
## 7 0.7665 nan 0.1000 0.0216
## 8 0.7279 nan 0.1000 0.0195
## 9 0.6925 nan 0.1000 0.0167
## 10 0.6605 nan 0.1000 0.0138
## 20 0.4925 nan 0.1000 0.0043
## 40 0.4104 nan 0.1000 0.0007
## 60 0.3745 nan 0.1000 -0.0004
## 80 0.3475 nan 0.1000 -0.0009
## 100 0.3273 nan 0.1000 -0.0005
## 120 0.3089 nan 0.1000 -0.0010
## 140 0.2937 nan 0.1000 -0.0002
## 150 0.2878 nan 0.1000 -0.0008
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2003 nan 0.1000 0.0675
## 2 1.0872 nan 0.1000 0.0513
## 3 0.9947 nan 0.1000 0.0419
## 4 0.9185 nan 0.1000 0.0363
## 5 0.8547 nan 0.1000 0.0313
## 6 0.7980 nan 0.1000 0.0272
## 7 0.7516 nan 0.1000 0.0220
## 8 0.7096 nan 0.1000 0.0177
## 9 0.6775 nan 0.1000 0.0137
## 10 0.6455 nan 0.1000 0.0142
## 20 0.4732 nan 0.1000 0.0043
## 40 0.3783 nan 0.1000 -0.0010
## 60 0.3336 nan 0.1000 -0.0006
## 80 0.3003 nan 0.1000 -0.0010
## 100 0.2814 nan 0.1000 -0.0024
## 120 0.2605 nan 0.1000 -0.0003
## 140 0.2447 nan 0.1000 -0.0015
## 150 0.2356 nan 0.1000 -0.0008
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2089 nan 0.1000 0.0650
## 2 1.1006 nan 0.1000 0.0516
## 3 1.0097 nan 0.1000 0.0406
## 4 0.9386 nan 0.1000 0.0343
## 5 0.8777 nan 0.1000 0.0282
## 6 0.8219 nan 0.1000 0.0262
## 7 0.7774 nan 0.1000 0.0229
## 8 0.7361 nan 0.1000 0.0202
## 9 0.7003 nan 0.1000 0.0167
## 10 0.6714 nan 0.1000 0.0128
## 20 0.4995 nan 0.1000 0.0037
## 40 0.4198 nan 0.1000 -0.0001
## 60 0.4027 nan 0.1000 -0.0001
## 80 0.3927 nan 0.1000 -0.0008
## 100 0.3821 nan 0.1000 -0.0007
## 120 0.3740 nan 0.1000 -0.0000
## 140 0.3678 nan 0.1000 -0.0008
## 150 0.3648 nan 0.1000 -0.0005
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2028 nan 0.1000 0.0663
## 2 1.0888 nan 0.1000 0.0556
## 3 0.9958 nan 0.1000 0.0467
## 4 0.9167 nan 0.1000 0.0363
## 5 0.8542 nan 0.1000 0.0296
## 6 0.7987 nan 0.1000 0.0240
## 7 0.7518 nan 0.1000 0.0210
## 8 0.7101 nan 0.1000 0.0212
## 9 0.6751 nan 0.1000 0.0163
## 10 0.6429 nan 0.1000 0.0142
## 20 0.4708 nan 0.1000 0.0039
## 40 0.3826 nan 0.1000 0.0011
## 60 0.3435 nan 0.1000 -0.0007
## 80 0.3180 nan 0.1000 -0.0002
## 100 0.3029 nan 0.1000 -0.0005
## 120 0.2877 nan 0.1000 -0.0012
## 140 0.2745 nan 0.1000 -0.0005
## 150 0.2697 nan 0.1000 -0.0007
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1965 nan 0.1000 0.0691
## 2 1.0851 nan 0.1000 0.0520
## 3 0.9935 nan 0.1000 0.0472
## 4 0.9094 nan 0.1000 0.0386
## 5 0.8442 nan 0.1000 0.0323
## 6 0.7877 nan 0.1000 0.0248
## 7 0.7418 nan 0.1000 0.0229
## 8 0.6978 nan 0.1000 0.0211
## 9 0.6604 nan 0.1000 0.0167
## 10 0.6283 nan 0.1000 0.0156
## 20 0.4565 nan 0.1000 0.0013
## 40 0.3513 nan 0.1000 -0.0020
## 60 0.3105 nan 0.1000 -0.0009
## 80 0.2848 nan 0.1000 -0.0011
## 100 0.2593 nan 0.1000 -0.0005
## 120 0.2388 nan 0.1000 -0.0010
## 140 0.2236 nan 0.1000 -0.0005
## 150 0.2178 nan 0.1000 -0.0000
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2081 nan 0.1000 0.0646
## 2 1.1048 nan 0.1000 0.0502
## 3 1.0166 nan 0.1000 0.0436
## 4 0.9446 nan 0.1000 0.0342
## 5 0.8878 nan 0.1000 0.0270
## 6 0.8374 nan 0.1000 0.0240
## 7 0.7920 nan 0.1000 0.0211
## 8 0.7517 nan 0.1000 0.0180
## 9 0.7170 nan 0.1000 0.0167
## 10 0.6891 nan 0.1000 0.0132
## 20 0.5203 nan 0.1000 0.0040
## 40 0.4400 nan 0.1000 0.0003
## 60 0.4258 nan 0.1000 -0.0001
## 80 0.4139 nan 0.1000 -0.0007
## 100 0.4053 nan 0.1000 -0.0005
## 120 0.3969 nan 0.1000 -0.0007
## 140 0.3903 nan 0.1000 -0.0009
## 150 0.3880 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2033 nan 0.1000 0.0645
## 2 1.0951 nan 0.1000 0.0530
## 3 1.0070 nan 0.1000 0.0441
## 4 0.9327 nan 0.1000 0.0373
## 5 0.8684 nan 0.1000 0.0313
## 6 0.8103 nan 0.1000 0.0257
## 7 0.7598 nan 0.1000 0.0226
## 8 0.7228 nan 0.1000 0.0175
## 9 0.6869 nan 0.1000 0.0175
## 10 0.6548 nan 0.1000 0.0147
## 20 0.4945 nan 0.1000 0.0043
## 40 0.4083 nan 0.1000 -0.0006
## 60 0.3719 nan 0.1000 -0.0008
## 80 0.3505 nan 0.1000 -0.0005
## 100 0.3311 nan 0.1000 -0.0015
## 120 0.3127 nan 0.1000 -0.0015
## 140 0.2966 nan 0.1000 -0.0010
## 150 0.2889 nan 0.1000 -0.0006
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.1956 nan 0.1000 0.0643
## 2 1.0856 nan 0.1000 0.0544
## 3 0.9978 nan 0.1000 0.0444
## 4 0.9260 nan 0.1000 0.0339
## 5 0.8594 nan 0.1000 0.0325
## 6 0.8099 nan 0.1000 0.0258
## 7 0.7575 nan 0.1000 0.0249
## 8 0.7149 nan 0.1000 0.0192
## 9 0.6768 nan 0.1000 0.0177
## 10 0.6438 nan 0.1000 0.0130
## 20 0.4717 nan 0.1000 0.0027
## 40 0.3789 nan 0.1000 -0.0006
## 60 0.3337 nan 0.1000 -0.0016
## 80 0.3092 nan 0.1000 -0.0012
## 100 0.2835 nan 0.1000 -0.0014
## 120 0.2631 nan 0.1000 -0.0016
## 140 0.2463 nan 0.1000 -0.0006
## 150 0.2382 nan 0.1000 -0.0010
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.2091 nan 0.1000 0.0631
## 2 1.1116 nan 0.1000 0.0434
## 3 1.0253 nan 0.1000 0.0420
## 4 0.9558 nan 0.1000 0.0373
## 5 0.8942 nan 0.1000 0.0310
## 6 0.8384 nan 0.1000 0.0254
## 7 0.7951 nan 0.1000 0.0216
## 8 0.7566 nan 0.1000 0.0170
## 9 0.7247 nan 0.1000 0.0155
## 10 0.6925 nan 0.1000 0.0144
## 20 0.5239 nan 0.1000 0.0043
## 40 0.4394 nan 0.1000 0.0008
## 50 0.4272 nan 0.1000 -0.0007
Similarly, we can create an ensemble with logistic regression as the top layer model as well.
#Logistic regression as top layer model
model_glm <- train(trainSet[, predictors_top], trainSet[, outcomeName],
method = 'glm', trControl = fitControl, tuneLength = 3)
#predict using GBM top layer model
testSet$gbm_stacked <- predict(model_gbm, testSet[, predictors_top])
#predict using logictic regression top layer model
testSet$glm_stacked <- predict(model_glm, testSet[, predictors_top])
#Accuracy of GBM top layer model
confusionMatrix(testSet$IsGoodNews, testSet$gbm_stacked)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 140 4
## Yes 8 84
##
## Accuracy : 0.9492
## 95% CI : (0.9129, 0.9735)
## No Information Rate : 0.6271
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8923
##
## Mcnemar's Test P-Value : 0.3865
##
## Sensitivity : 0.9459
## Specificity : 0.9545
## Pos Pred Value : 0.9722
## Neg Pred Value : 0.9130
## Prevalence : 0.6271
## Detection Rate : 0.5932
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9502
##
## 'Positive' Class : No
##
#Accuracy of logictic regression top layer model
confusionMatrix(testSet$IsGoodNews, testSet$glm_stacked)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 138 6
## Yes 9 83
##
## Accuracy : 0.9364
## 95% CI : (0.8973, 0.964)
## No Information Rate : 0.6229
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8656
##
## Mcnemar's Test P-Value : 0.6056
##
## Sensitivity : 0.9388
## Specificity : 0.9326
## Pos Pred Value : 0.9583
## Neg Pred Value : 0.9022
## Prevalence : 0.6229
## Detection Rate : 0.5847
## Detection Prevalence : 0.6102
## Balanced Accuracy : 0.9357
##
## 'Positive' Class : No
##
Great! You made your first ensemble.
Note it’s really important to choose the models for the ensemble wisely to get the best out of the ensemble. The two thumb rules that we discussed will greatly help you in that.
Ensembling is a very popular and effective technique that is very frequently used by data scientists for beating the accuracy benchmark of even the best of individual algorithms. More often than not it’s the winning recipe in hackathons. The more you’ll use ensembling, the more you’ll admire its beauty.