Ensemble Models for Prediction

Introduction

This notebook introduces you to the notion of ensemble models for prediction. As the word suggests, this refers to a group or a collection of models. The basic idea is that by combining multiple models we can reduce the error (bias/variance) of any one model and possibly increase predictive accuracy. In this notebook we will be looking at such models for classification tasks (which is their most popular application), although they are also used for continuous predictions. We will be looking at a variety of models: Decision Trees, Bagging, Random Forests, and Boosting.

Loading packages we’ll need

#Installs required packages
if(!require(readr)) install.packages("readr",repos = "http://cran.us.r-project.org")
if(!require(lubridate)) install.packages("lubridate",repos = "http://cran.us.r-project.org")
if(!require(rpart)) install.packages("rpart",repos = "http://cran.us.r-project.org")
if(!require(rpart.plot)) install.packages("rpart.plot",repos = "http://cran.us.r-project.org") # for decision trees
if(!require(C50)) install.packages("C50",repos = "http://cran.us.r-project.org") # for boosting
if(!require(randomForest)) install.packages("randomForest",repos = "http://cran.us.r-project.org") # for bagging & random forest
if(!require(caret)) install.packages("caret",repos = "http://cran.us.r-project.org")
if(!require(ROCit)) install.packages("ROCit",repos = "http://cran.us.r-project.org")
if(!require(pscl)) install.packages("pscl",repos = "http://cran.us.r-project.org")

library(readr)
library(lubridate)
library(rpart)
library(rpart.plot)
library(C50)
library(randomForest)
library(caret)
library(ROCit)
library(pscl)


set.seed(1234) # setting seed for random draws

Loading and reading data

library(readr)
cs.df <- read.csv("/Users/ayandacollins/Desktop/MISDI/Marketing Analytics/11488_Marketing Analytics Final Exam/moviedata.csv")

# View data frame
#View(cs.df)

# Getting an overview of the data
summary(cs.df)

##  Opening_Income       Gross_Income            ROI            ReleaseTime    
##  Min.   :     2211   Min.   :     2421   Min.   : -0.9997   Min.   :0.0000  
##  1st Qu.:  2798229   1st Qu.: 10166820   1st Qu.: -0.4530   1st Qu.:0.0000  
##  Median : 11203815   Median : 33562069   Median :  0.1117   Median :0.0000  
##  Mean   : 25368428   Mean   : 76619542   Mean   :  1.9934   Mean   :0.4207  
##  3rd Qu.: 23762435   3rd Qu.: 72679278   3rd Qu.:  1.4035   3rd Qu.:1.0000  
##  Max.   :357115007   Max.   :936662225   Max.   :226.6441   Max.   :1.0000  
##     Genre               MPAA             Production      Distribution   
##  Length:309         Length:309         Min.   :0.0000   Min.   :0.0000  
##  Class :character   Class :character   1st Qu.:0.0000   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Median :0.0000   Median :1.0000  
##                                        Mean   :0.2104   Mean   :0.5146  
##                                        3rd Qu.:0.0000   3rd Qu.:1.0000  
##                                        Max.   :1.0000   Max.   :1.0000  
##   PriorPopular        Budget             Screens      PublicRating  
##  Min.   :0.0000   Min.   :    10000   Min.   :   1   Min.   :3.000  
##  1st Qu.:0.0000   1st Qu.: 10000000   1st Qu.:1656   1st Qu.:5.600  
##  Median :1.0000   Median : 28000000   Median :2934   Median :6.300  
##  Mean   :0.5405   Mean   : 54159460   Mean   :2551   Mean   :6.241  
##  3rd Qu.:1.0000   3rd Qu.: 62000000   3rd Qu.:3585   3rd Qu.:6.900  
##  Max.   :1.0000   Max.   :356000000   Max.   :4662   Max.   :8.500  
##   CriticRating        Votes           Reviews          Duration    
##  Min.   :0.0000   Min.   :   100   Min.   :  10.0   Min.   : 79.0  
##  1st Qu.:0.2900   1st Qu.: 18219   1st Qu.: 269.0   1st Qu.: 96.0  
##  Median :0.5500   Median : 51861   Median : 509.0   Median :106.0  
##  Mean   :0.5496   Mean   :108324   Mean   : 771.2   Mean   :108.7  
##  3rd Qu.:0.8300   3rd Qu.:115603   3rd Qu.: 898.0   3rd Qu.:119.0  
##  Max.   :1.0000   Max.   :845841   Max.   :8833.0   Max.   :181.0

#Examining the structure of the data
str(cs.df)

## 'data.frame':    309 obs. of  16 variables:
##  $ Opening_Income: int  196496 159615 66510 38560195 16628170 96250 46607250 14869736 1424758 25369 ...
##  $ Gross_Income  : int  6758416 7751969 3902185 100407760 46009673 538460 114581250 100014699 48791187 34694 ...
##  $ ROI           : num  -0.155 -0.139 -0.721 0.826 0.394 ...
##  $ ReleaseTime   : int  0 0 1 0 1 1 0 1 0 0 ...
##  $ Genre         : chr  "Romance" "Romance" "Romance" "Romance" ...
##  $ MPAA          : chr  "PG-13" "PG-13" "PG-13" "R" ...
##  $ Production    : int  0 0 0 0 1 1 1 1 0 0 ...
##  $ Distribution  : int  0 0 0 1 1 1 1 1 1 0 ...
##  $ PriorPopular  : int  1 1 1 1 0 0 1 0 0 0 ...
##  $ Budget        : int  8000000 9000000 14000000 55000000 33000000 10000000 55000000 110000000 31000000 8000000 ...
##  $ Screens       : int  870 572 317 3768 3008 142 3714 3478 3444 35 ...
##  $ PublicRating  : num  7.7 7 6.8 4.5 6 5.3 4.6 7 5.9 6.3 ...
##  $ CriticRating  : num  0.81 0.89 0.84 0.12 0.37 0.35 0.11 0.31 0.7 0.44 ...
##  $ Votes         : int  115324 28970 12685 46819 77144 16798 84408 320746 51520 43701 ...
##  $ Reviews       : int  531 387 241 409 326 257 531 1505 490 228 ...
##  $ Duration      : int  105 123 111 105 98 122 118 116 89 95 ...

Data Cleaning

We start by constructing our dependent variable, a binary indicator of success/failure. If it is greater than 0 make it 1.

#Construct a binary indicator of success
cs.df$win <- as.factor(cs.df$ROI >= 1)
summary(cs.df$win)

## FALSE  TRUE 
##   214    95

The cs.df dataframe is the final dataset we will use for prediction. As always, we will first create training and test datasets.

# create training and test data
ind <- sample(1:nrow(cs.df), floor(nrow(cs.df)*0.60))
cs.train.df <- cs.df[ind, ]
cs.test.df <- cs.df[-ind, ]

summary(cs.train.df$win)

## FALSE  TRUE 
##   127    58

summary(cs.test.df$win)

## FALSE  TRUE 
##    87    37

Logit model

# run a logistic regression on the training data
logit.regr <- glm(win ~ ReleaseTime+Genre+MPAA+Production+Distribution+PriorPopular+Opening_Income+Budget+Screens+Duration+Budget+PublicRating+CriticRating+Votes+Reviews, data = cs.train.df, family = "binomial")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logit.regr)

## 
## Call:
## glm(formula = win ~ ReleaseTime + Genre + MPAA + Production + 
##     Distribution + PriorPopular + Opening_Income + Budget + Screens + 
##     Duration + Budget + PublicRating + CriticRating + Votes + 
##     Reviews, family = "binomial", data = cs.train.df)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.86406  -0.33107  -0.01747   0.04497   3.10416  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.379e+01  1.570e+03  -0.009   0.9930    
## ReleaseTime     1.079e-01  6.565e-01   0.164   0.8694    
## GenreAdventure  4.136e+00  1.621e+00   2.552   0.0107 *  
## GenreComedy     8.756e-01  1.362e+00   0.643   0.5203    
## GenreDrama      7.370e-01  1.315e+00   0.560   0.5752    
## GenreHorror    -2.443e-01  1.530e+00  -0.160   0.8732    
## GenreRomance    1.272e-01  1.336e+00   0.095   0.9242    
## GenreThriller   5.284e-01  1.411e+00   0.375   0.7080    
## MPAAPG          1.732e+01  1.570e+03   0.011   0.9912    
## MPAAPG-13       1.755e+01  1.570e+03   0.011   0.9911    
## MPAAR           1.605e+01  1.570e+03   0.010   0.9918    
## Production     -1.055e-01  1.148e+00  -0.092   0.9268    
## Distribution   -3.706e-01  9.219e-01  -0.402   0.6877    
## PriorPopular   -2.349e-01  7.631e-01  -0.308   0.7582    
## Opening_Income  2.930e-07  6.496e-08   4.510 6.47e-06 ***
## Budget         -2.503e-07  5.427e-08  -4.612 4.00e-06 ***
## Screens         9.818e-04  3.947e-04   2.487   0.0129 *  
## Duration       -4.900e-02  2.907e-02  -1.686   0.0919 .  
## PublicRating   -1.898e-01  5.256e-01  -0.361   0.7181    
## CriticRating    1.775e-01  1.723e+00   0.103   0.9179    
## Votes          -1.402e-06  4.437e-06  -0.316   0.7521    
## Reviews         4.672e-03  1.165e-03   4.010 6.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 230.097  on 184  degrees of freedom
## Residual deviance:  86.129  on 163  degrees of freedom
## AIC: 130.13
## 
## Number of Fisher Scoring iterations: 15

Get goodness-of-fit results. If the project was featured on the main page then it increases significantly the likelihood that the project is funded. If you enable paypal or do all or nothing it enhances the likelihood of getting funded significantly.

# get LL and pseudo r2
library(pscl)
pR2(logit.regr)

## fitting null model for pseudo-r2

##          llh      llhNull           G2     McFadden         r2ML         r2CU 
##  -43.0644640 -115.0483730  143.9678181    0.6256839    0.5407701    0.7598249

As before, we’ll rerun the logit regression after removing the non-significant variables

# logit on clean training data
logit.regr.clean <- glm(win ~ Genre+Opening_Income+Budget+Screens+Duration+Reviews, 
                  data = cs.train.df, family = "binomial")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logit.regr.clean)

## 
## Call:
## glm(formula = win ~ Genre + Opening_Income + Budget + Screens + 
##     Duration + Reviews, family = "binomial", data = cs.train.df)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.87894  -0.38327  -0.05238   0.05940   3.07921  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.184e+00  2.651e+00   0.824  0.40993    
## GenreAdventure  3.409e+00  1.302e+00   2.619  0.00883 ** 
## GenreComedy     8.637e-01  1.135e+00   0.761  0.44656    
## GenreDrama      9.069e-01  1.082e+00   0.838  0.40211    
## GenreHorror    -4.335e-01  1.324e+00  -0.327  0.74329    
## GenreRomance    5.393e-01  1.120e+00   0.482  0.63007    
## GenreThriller   6.527e-01  1.129e+00   0.578  0.56305    
## Opening_Income  2.468e-07  4.866e-08   5.071 3.97e-07 ***
## Budget         -2.108e-07  4.047e-08  -5.209 1.90e-07 ***
## Screens         9.027e-04  2.832e-04   3.188  0.00143 ** 
## Duration       -5.488e-02  2.612e-02  -2.101  0.03566 *  
## Reviews         4.084e-03  9.111e-04   4.482 7.40e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 230.097  on 184  degrees of freedom
## Residual deviance:  93.224  on 173  degrees of freedom
## AIC: 117.22
## 
## Number of Fisher Scoring iterations: 8

Let’s use the logit model above to predict on the test data.

# predict on the test data
logit.test.pred <- predict(logit.regr.clean, cs.test.df, type = "response")
summary(logit.test.pred)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.001785 0.109048 0.351247 0.808238 1.000000

And let’s create our usual confusion matrix.

# confusion matrix
logit.predict <- logit.test.pred
logit.predict[logit.predict<0.5] <- 0
logit.predict[logit.predict>=0.5] <- 1
table(logit.predict, cs.test.df$win, dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##         0    77    4
##         1    10   33

TN: 77 TP: 33

(33+77)/(77+4+10+33)=.887

The matrix tells us that we are doing fairly well in predicting cases of False, and doing quite badly in predicting cases of True. To recall some of the measures we’d discussed earlier, the accuracy or hit rate is (TP + TN)/Total = (4100 + 505)/5742 = 0.80.

how many individuals in segment A got classified into segment A (true positives)
how many individuals not in segment A got classified into segment A (false positives)
how many individuals not in segment A got classified into other segments (true negatives)
how many individuals in segment A got classified into other segments (false negatives)

Use logit and probit when outcome variable is discrete (a choice), otherwise you can use a regression. Logit vs. probit doesn’t matter

Let’s turn to our next model, Decision Trees.

# logit on pre-release data only
logit.regr.prerelease <- glm(win ~ ReleaseTime+Genre+Production+Distribution+PriorPopular+Budget+Screens+Duration, 
                  data = cs.train.df, family = "binomial")
summary(logit.regr.prerelease)

## 
## Call:
## glm(formula = win ~ ReleaseTime + Genre + Production + Distribution + 
##     PriorPopular + Budget + Screens + Duration, family = "binomial", 
##     data = cs.train.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5121  -0.8165  -0.5272   0.9572   2.7062  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.778e-01  1.574e+00  -0.240  0.81034    
## ReleaseTime     4.002e-01  3.682e-01   1.087  0.27710    
## GenreAdventure  3.668e-01  7.224e-01   0.508  0.61161    
## GenreComedy     7.930e-02  7.114e-01   0.111  0.91125    
## GenreDrama      3.766e-01  7.226e-01   0.521  0.60222    
## GenreHorror     1.205e+00  8.297e-01   1.453  0.14633    
## GenreRomance    7.240e-01  7.752e-01   0.934  0.35030    
## GenreThriller   6.161e-01  6.883e-01   0.895  0.37074    
## Production      4.025e-01  5.432e-01   0.741  0.45865    
## Distribution   -3.872e-01  4.932e-01  -0.785  0.43235    
## PriorPopular   -7.324e-02  3.980e-01  -0.184  0.85400    
## Budget         -1.724e-08  6.483e-09  -2.659  0.00783 ** 
## Screens         9.107e-04  2.269e-04   4.014 5.96e-05 ***
## Duration       -2.290e-02  1.493e-02  -1.534  0.12503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 230.10  on 184  degrees of freedom
## Residual deviance: 192.05  on 171  degrees of freedom
## AIC: 220.05
## 
## Number of Fisher Scoring iterations: 5

# predict on the test data
logit.test.pred.prerelease <- predict(logit.regr.prerelease, cs.test.df, type = "response")
summary(logit.test.pred.prerelease)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001489 0.129576 0.326735 0.332804 0.493108 0.819339

# confusion matrix
logit.predict.prerelease <- logit.test.pred.prerelease
logit.predict.prerelease[logit.predict.prerelease<0.5] <- 0
logit.predict.prerelease[logit.predict.prerelease>=0.5] <- 1
table(logit.predict.prerelease, cs.test.df$win, dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##         0    76   18
##         1    11   19

Decision Trees

As before, we will build a tree on the training data and then predict on the test data.

#decision trees all variables
simple.tree <- rpart(win ~ ReleaseTime+Genre+Production+Distribution+PriorPopular+Opening_Income+Budget+Screens+Duration+Budget+PublicRating+CriticRating+Votes+Reviews, data = cs.train.df, method =
   "class")

rpart.plot(simple.tree)

simple.predict <- as.matrix(predict(simple.tree, type="class", newdata = cs.test.df))
table(simple.predict, as.matrix(cs.test.df$win), dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##     FALSE    75    8
##     TRUE     12   29

(75+29)/(75+8+12+29)

Now, a major consideration for decision trees is deciding if one needs to prune the tree. There are various ways of doing this, and we use a particularly simple approach readily available in the package.

#pruning
printcp(simple.tree)

## 
## Classification tree:
## rpart(formula = win ~ ReleaseTime + Genre + Production + Distribution + 
##     PriorPopular + Opening_Income + Budget + Screens + Duration + 
##     Budget + PublicRating + CriticRating + Votes + Reviews, data = cs.train.df, 
##     method = "class")
## 
## Variables actually used in tree construction:
## [1] Budget         Opening_Income Reviews        Screens       
## 
## Root node error: 58/185 = 0.31351
## 
## n= 185 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.206897      0   1.00000 1.00000 0.108793
## 2 0.112069      2   0.58621 0.75862 0.099844
## 3 0.034483      4   0.36207 0.63793 0.093803
## 4 0.010000      5   0.32759 0.82759 0.102794

simple.prune <- prune(simple.tree, cp=simple.tree$cptable[which.min(simple.tree$cptable[,"xerror"]),"CP"])
rpart.plot(simple.prune)

This tells us that there was no merit to pruning the tree we already had, since the output is identical.

Let’s turn to our next model, Bagging.

Bagging

bag.cs=randomForest(win ~ ReleaseTime+Genre+Production+Distribution+PriorPopular+Opening_Income+Budget+Screens+Duration+Budget+PublicRating+CriticRating+Votes+Reviews, data = cs.train.df, mtry=12,
                    na.action=na.omit, importance=TRUE)
bag.cs

## 
## Call:
##  randomForest(formula = win ~ ReleaseTime + Genre + Production +      Distribution + PriorPopular + Opening_Income + Budget + Screens +      Duration + Budget + PublicRating + CriticRating + Votes +      Reviews, data = cs.train.df, mtry = 12, importance = TRUE,      na.action = na.omit) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 12
## 
##         OOB estimate of  error rate: 15.68%
## Confusion matrix:
##       FALSE TRUE class.error
## FALSE   118    9  0.07086614
## TRUE     20   38  0.34482759

Note that the above is for the training data. The pattern is clearly one of doing a bad job of the True.
We now predict on test data.

yhat.bag = predict(bag.cs,newdata=cs.test.df)
#check confusion matrix
library(caret)
confusionMatrix(yhat.bag, cs.test.df$win)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction FALSE TRUE
##      FALSE    84    6
##      TRUE      3   31
##                                           
##                Accuracy : 0.9274          
##                  95% CI : (0.8667, 0.9663)
##     No Information Rate : 0.7016          
##     P-Value [Acc > NIR] : 6.476e-10       
##                                           
##                   Kappa : 0.8225          
##                                           
##  Mcnemar's Test P-Value : 0.505           
##                                           
##             Sensitivity : 0.9655          
##             Specificity : 0.8378          
##          Pos Pred Value : 0.9333          
##          Neg Pred Value : 0.9118          
##              Prevalence : 0.7016          
##          Detection Rate : 0.6774          
##    Detection Prevalence : 0.7258          
##       Balanced Accuracy : 0.9017          
##                                           
##        'Positive' Class : FALSE           
##

It is often useful to look at how important each variable is in a bagging model. This is what the plot below shows.

#plot MeanDecreaseGini
varImpPlot(bag.cs,type=2)

The next model is Random Forests.

Random Forests

#random forest
rforest.tree <- randomForest(win ~ ReleaseTime+Genre+Production+Distribution+PriorPopular+Opening_Income+Budget+Screens+Duration+Budget+PublicRating+CriticRating+Votes+Reviews, data = cs.train.df, 
                       ntreeTry=100, mtry=4, na.action=na.omit, proximity=TRUE)

Predicting on test data.

rforest.predict <- predict(rforest.tree, newdata = cs.test.df)
table(rforest.predict, cs.test.df$win, dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##     FALSE    83    9
##     TRUE      4   28

Our next model is boosting.

Boosting

#boosted trees
boosted.tree <- C5.0(win ~ ReleaseTime+Genre+Production+Distribution+PriorPopular+Opening_Income+Budget+Screens+Duration+Budget+PublicRating+CriticRating+Votes+Reviews, data = cs.train.df, trial = 20)
plot(boosted.tree, trial = 4)

boost.predict <- predict(boosted.tree, newdata = cs.test.df)
table(boost.predict, cs.test.df$win, dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##     FALSE    83    4
##     TRUE      4   33

#boosted trees
boosted.tree2 <- C5.0(win ~ Budget+Screens+Duration, data = cs.train.df, trial = 20)
plot(boosted.tree2, trial = 4)

boost.predict2 <- predict(boosted.tree2, newdata = cs.test.df)
table(boost.predict2, cs.test.df$win, dnn=c("Predicted", "Actual"))

##          Actual
## Predicted FALSE TRUE
##     FALSE    84   23
##     TRUE      3   14

Comparing Models

We look at our familiar ROC curves to get a sense of how the various models do against each other. We should already have a sense of this, from the confusion matrices we’ve seen so far; all the models did a fairly good job of predicting the “0” correctly, and a fairly bad job of predicting the “1”, i.e., there were quite a few successful projects that were classified as unsuccessful.

## Logit
## ======
roc_logit <- rocit(score = logit.test.pred, 
                   class = cs.test.df$win, 
                   method = "emp")
auc_logit <- roc_logit$AUC
print(auc_logit)

## [1] 0.9493632

# Default plot
plot(roc_logit, values = F)

## Simple Decision Tree

# first get the predicted vector to be numerical
simple.predict.num <- simple.predict
simple.predict.num <- replace(simple.predict.num, simple.predict.num == "TRUE", 1)
simple.predict.num <- replace(simple.predict.num, simple.predict.num == "FALSE", 0)
simple.predict.num <- sapply(simple.predict.num, as.numeric)
roc_dectree <- rocit(score = simple.predict.num, 
                   class = cs.test.df$win, 
                   method = "emp")
auc_dectree <- roc_dectree$AUC
print(auc_dectree)

## [1] 0.8229264

# Default plot
plot(roc_dectree, values = F)

## Boosting

# first get the predicted vector to be numerical
boost.predict.num <- as.integer(boost.predict)
boost.predict.num <- replace(boost.predict.num, boost.predict.num == 1, 0)
boost.predict.num <- replace(boost.predict.num, boost.predict.num == 2, 1)
roc_boost <- rocit(score = boost.predict.num, 
                   class = cs.test.df$win, 
                   method = "emp")
auc_boost <- roc_boost$AUC
print(auc_boost)

## [1] 0.9229574

# Default plot
plot(roc_boost, values = F)

## Random Forest

# first get the predicted vector to be numerical
rforest.predict.num <- as.integer(rforest.predict)
rforest.predict.num <- replace(rforest.predict.num, rforest.predict.num == 1, 0)
rforest.predict.num <- replace(rforest.predict.num, rforest.predict.num == 2, 1)
roc_rf <- rocit(score = rforest.predict.num, 
                   class = cs.test.df$win, 
                   method = "emp")
auc_rf <- roc_rf$AUC
print(auc_rf)

## [1] 0.8553899

# Default plot
plot(roc_rf, values = F)

## Bagging

# first get the predicted vector to be numerical
yhat.bag.num <- as.integer(yhat.bag)
yhat.bag.num <- replace(yhat.bag.num, yhat.bag.num == 1, 0)
yhat.bag.num <- replace(yhat.bag.num, yhat.bag.num == 2, 1)
roc_bag <- rocit(score = yhat.bag.num, 
                   class = cs.test.df$win, 
                   method = "emp")
auc_bag <- roc_bag$AUC
print(auc_bag)

## [1] 0.9016775

# Default plot
plot(roc_bag, values = F)

```