# !diagnostics off

description <- structure(list(sku = structure(c(9L, 6L, 5L, 2L, 3L, 4L, 17L, 
18L, 19L, 20L, 8L, 14L, 13L, 12L, 11L, 7L, 1L, 10L, 15L, 21L, 
16L, 22L), .Label = c("deck_risk ", "forecast_3_month ", "forecast_6_month ", 
"forecast_9_month ", "in_transit_qty ", "lead_time ", "local_bo_qty ", 
"min_bank ", "national_inv ", "oe_constraint ", "perf_12_month_avg ", 
"perf_6_month_avg ", "pieces_past_due ", "potential_issue ", 
"ppap_risk ", "rev_stop ", "sales_1_month ", "sales_3_month ", 
"sales_6_month ", "sales_9_month ", "stop_auto_buy ", "went_on_backorder "
), class = "factor"), Random.ID.for.the.product = structure(c(3L, 
18L, 1L, 4L, 5L, 6L, 11L, 12L, 13L, 14L, 7L, 15L, 9L, 17L, 16L, 
2L, 8L, 8L, 8L, 8L, 8L, 10L), .Label = c(" Amount of product in transit from source", 
" Amount of stock orders overdue", " Current inventory level for the part", 
" Forecast sales for the next 3 months", " Forecast sales for the next 6 months", 
" Forecast sales for the next 9 months", " Minimum recommend amount to stock", 
" Part risk flag", " Parts overdue from source", " Product actually went on backorder. This is the target value.", 
" Sales quantity for the prior 1 month time period", " Sales quantity for the prior 3 month time period", 
" Sales quantity for the prior 6 month time period", " Sales quantity for the prior 9 month time period", 
" Source issue for part identified", " Source performance for prior 12 month period", 
" Source performance for prior 6 month period", " Transit time for product (if available)"
), class = "factor")), .Names = c("sku", "Random.ID.for.the.product"
), class = "data.frame", row.names = c(NA, -22L))

#sapply(all, function(x) sum(is.na(x)))
all[is.na(all)] = 0
all$lead_time %<>% as.numeric()
all <- as.data.frame(unclass(all))

all2 <- bind_rows(all %>% filter(went_on_backorder == "Yes") %>% sample_n(1000),
                  all %>% filter(went_on_backorder == "No") %>% sample_n(1000)) %>% tbl_df()

train <- all2 %>% filter(datatype == "train") %>% select(-datatype)
train <- bind_rows(train %>% filter(went_on_backorder == "Yes") %>% sample_n(100),
                   train %>% filter(went_on_backorder == "No") %>% sample_n(100)
                   ) %>% select(-sku)
test <- all2 %>% filter(datatype == "test") %>% select(-datatype)
test <- bind_rows(test %>% filter(went_on_backorder == "Yes") %>% sample_n(50),
                  test %>% filter(went_on_backorder == "No") %>% sample_n(50)
                  ) %>% select(-sku)

Descriptive Measures

#all_h2o <- cbind(weights, all[, c(which(colvars$variance > 50))])

all_h2o <- as.h2o(all2, destination_frame="prostate.hex")


  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

h2o.describe(all_h2o) %>% # excluding the weights column
  gather(x, y, Zeros:Sigma) %>%
  mutate(group = ifelse(x %in% c("Min", "Max", "Mean"), "min, mean, max", 
                        ifelse(x %in% c("NegInf", "PosInf"), "Inf", "sigma, zeros"))) %>%
  # separating them into facets makes them easier to see %>%
  mutate(Label = factor(Label, levels = colnames(all_h2o[, -1]))) %>%
  ggplot2::ggplot(aes(x = Label %>% as.character() %>% to_title(), y = as.numeric(y), color = x))+
    geom_point(size = 3, alpha = 0.6) +
    scale_color_brewer(palette = "Set1") +
    my_theme() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
    facet_grid(group ~ ., scales = "free") +
    labs(x = "Feature",
         y = "Value",
         color = "Metrics: ",
         title = "Exoplanet: Descriptive Measures") +
    theme(legend.title = element_text(face = "bold")) +
    theme(plot.title = element_text(face="bold", hjust = 0.5)) +
    theme(legend.background = element_rect(size = 0.1), 
          legend.position = "right",
          legend.direction = "vertical",
          axis.text.x = element_text(size=8))

XGBoost and SVM Comparison

XGBT <- train(went_on_backorder ~ .,
                  data=train,
                  method="xgbTree",
                  preProcess = NULL,
                  tuneLength=5,
                  trControl = trainControl(method = "repeatedcv",
                                           number=5,
                                           repeats=5,
                                           verboseIter = FALSE,
                                           summaryFunction=multiClassSummary)
)

#Support vector machine with Linear Kernel model


SVM <- train(went_on_backorder ~ .,
            data=train,
            method="svmLinear",
            preProcess = NULL,
            tuneLength=5,
            trControl = trainControl(method = "repeatedcv",
                                     number=5,
                                     repeats=5,
                                     verboseIter = FALSE,
                                     summaryFunction=multiClassSummary)
)

Cross Validation Confusion Matricies

XGBT

confusionMatrix(XGBT,positive ="Yes")

Cross-Validated (5 fold, repeated 5 times) Confusion Matrix 

(entries are percentual average cell counts across resamples)
 
          Reference
Prediction   No  Yes
       No  41.2  8.8
       Yes  8.8 41.2
                           
 Accuracy (average) : 0.824

SVM

confusionMatrix(SVM,positive ="Yes")

Cross-Validated (5 fold, repeated 5 times) Confusion Matrix 

(entries are percentual average cell counts across resamples)
 
          Reference
Prediction   No  Yes
       No  36.9  9.0
       Yes 13.1 41.0
                           
 Accuracy (average) : 0.779

resXGB <- XGBT$resample %>% 
  separate(Resample,into = c("Fold", "Iter"), sep = ".Rep")

resXGB = dplyr::rename(resXGB,
                       ACC=Accuracy,
                       KAP=Kappa,
                       SEN=Sensitivity,
                       SPEC=Specificity,
                       PPV=Pos_Pred_Value,
                       NPV=Neg_Pred_Value,
                       BAC=Balanced_Accuracy) %>% 
                       .[,-7]
resXGB$Iteration=c(1:nrow(resXGB))
resXGB$model="XGB"

resSVM<-SVM$resample %>% 
  separate(Resample,into = c("Fold", "Iter"), sep = ".Rep")

resSVM = dplyr::rename(resSVM,
                       ACC=Accuracy,
                       KAP=Kappa,
                       SEN=Sensitivity,
                       SPEC=Specificity,
                       PPV=Pos_Pred_Value,
                       NPV=Neg_Pred_Value,
                       BAC=Balanced_Accuracy) %>% 
                        .[,-7]

resSVM$Iteration=c(1:nrow(resSVM))
resSVM$model="SVM"

resXGB=gather(resXGB,ACC:BAC,key="Metric",value="Value")
resSVM=gather(resSVM,ACC:BAC,key="Metric",value="Value")

resdf=rbind(resSVM,resXGB)

XGBT and SVM Comparison

Overall

grid.arrange(
resdf %>% 
ggplot(aes(x=Metric,y=Value,fill=model,color=model)) +
stat_summary(fun.ymin=min,fun.ymax=max,fun.y="median",shape=21,size=1) +
facet_grid(~model) +
coord_flip() +
scale_fill_brewer(palette = "Set1")  + 
  labs(title = "SVM & XGB Comparison: Line Chart") +
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold")),
resdf %>% 
  ggplot(aes(x=Metric,y=Value))+
  geom_boxplot(aes(fill=model),alpha=0.6)+
  coord_flip()+facet_wrap(~Metric,scales="free",ncol=2) +
  theme(axis.text.y=element_blank()) +
  scale_fill_brewer(palette = "Set1")+
  scale_color_brewer(palette = "Set1") +  
  labs(title = "SVM & XGB Comparison: Box Chart") +
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold"))
)

By Fold

grid.arrange(
resdf %>%
  ggplot(aes(x=Iteration,y=Value))+
  geom_path(aes(color=model),size=1,alpha=0.7)+
  facet_wrap(~Metric,scales="free",ncol=2)+
  scale_color_brewer(palette = "Set1") + 
  labs(title = "SVM & XGB Comparison") +
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold")),
resdf %>% 
  ggplot(aes(x=Iter,y=Value,group=model))+
  geom_line(aes(color=model),size=1,show.legend=F)+
  facet_grid(Fold~Metric,scales="free") +
  scale_color_brewer(palette = "Set1")  +
  labs(title = "Metrics by Fold") + 
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold"))
)

Test Confusion Matrix

Extreme Gradient Boosting: Train v. Test Confusion Matrix

predxgb <- predict(XGBT,test,type="prob") %>% cbind(test,.)
predxgb$Predicted=predict(XGBT,test)
predxgb$Model="XGBT"

predsvm <- test %>% mutate(.,Predicted=predict(SVM,.))
predsvm$Model="SVM"

confusionMatrix(predxgb$Predicted,
                reference=predxgb$went_on_backorder,
                positive ="Yes",
                mode="everything")

Confusion Matrix and Statistics

          Reference
Prediction No Yes
       No  42   6
       Yes  8  44
                                             
               Accuracy : 0.86               
                 95% CI : (0.7763, 0.9213)   
    No Information Rate : 0.5                
    P-Value [Acc > NIR] : 0.00000000000004142
                                             
                  Kappa : 0.72               
 Mcnemar's Test P-Value : 0.7893             
                                             
            Sensitivity : 0.8800             
            Specificity : 0.8400             
         Pos Pred Value : 0.8462             
         Neg Pred Value : 0.8750             
              Precision : 0.8462             
                 Recall : 0.8800             
                     F1 : 0.8627             
             Prevalence : 0.5000             
         Detection Rate : 0.4400             
   Detection Prevalence : 0.5200             
      Balanced Accuracy : 0.8600             
                                             
       'Positive' Class : Yes

XGBoost has an accuracy of 86%.

Support Vector Machines: Train v. Test Confusion Matrix

confusionMatrix(predsvm$Predicted,
                reference=predsvm$went_on_backorder,
                positive ="Yes",
                mode="everything")

Confusion Matrix and Statistics

          Reference
Prediction No Yes
       No  45   5
       Yes  5  45
                                             
               Accuracy : 0.9                
                 95% CI : (0.8238, 0.951)    
    No Information Rate : 0.5                
    P-Value [Acc > NIR] : <0.0000000000000002
                                             
                  Kappa : 0.8                
 Mcnemar's Test P-Value : 1                  
                                             
            Sensitivity : 0.90               
            Specificity : 0.90               
         Pos Pred Value : 0.90               
         Neg Pred Value : 0.90               
              Precision : 0.90               
                 Recall : 0.90               
                     F1 : 0.90               
             Prevalence : 0.50               
         Detection Rate : 0.45               
   Detection Prevalence : 0.50               
      Balanced Accuracy : 0.90               
                                             
       'Positive' Class : Yes

Support Vector Machines has an accuracy of 90%.

Feature Importance: XGBoost

features <- train %>% select(-went_on_backorder) %>%
  apply(.,2,function(x) as.numeric(as.character(x)))%>%
    as.matrix()%>%
      colnames()

xgboost::xgb.importance(features,XGBT$finalModel) %>%
  xgboost::xgb.ggplot.importance()

Pairs Prediction Graph

datalog <- all2 %>%
  select_if(is.numeric) %>% select(-sku) %>% log2(.) %>%
  mutate(.,Outcome=all2$went_on_backorder,
         PredXGBT=predict(XGBT,all2 %>% select(-sku)),
         PredSVM=predict(SVM,all2 %>% select(-sku))
         )

#colnames(datalog) = colnames(datalog) %>% to_title()

plotfuncLow <- function(data,mapping){
  p <- ggplot(data = data,mapping=mapping)+
    geom_point(aes(fill=datalog$PredXGBT),shape=21,color="black")+
    stat_density2d(geom="polygon",aes(fill=datalog$PredXGBT,alpha = ..level..))+
    scale_fill_manual(values=c("blue","red"))
  p
}

plotfuncUp <- function(data,mapping){
  p <- ggplot(data = data,mapping=mapping)+
    geom_point(aes(fill=datalog$PredSVM),shape=21,color="black")+
    stat_density2d(geom="polygon",aes(fill=datalog$PredSVM,alpha = ..level..))+
    scale_fill_manual(values=c("violet","gold"))
  p
}

library(GGally)

ggpairs(datalog,
        columns = 1:9,
        lower = list(continuous=plotfuncLow),
        diag=NULL,
        upper = list(continuous=plotfuncUp))

Decision Tree Methodology and Grid Tuning Exercise

all2 <- bind_rows(all %>% filter(went_on_backorder == "Yes") %>% sample_n(1000),
                  all %>% filter(went_on_backorder == "No") %>% sample_n(1000)) %>% tbl_df()

train <- all2 %>% filter(datatype == "train") %>% select(-datatype)
train <- bind_rows(train %>% filter(went_on_backorder == "Yes") %>% sample_n(300),
                   train %>% filter(went_on_backorder == "No") %>% sample_n(300)
                   ) %>% select(-sku)
test <- all2 %>% filter(datatype == "test") %>% select(-datatype)
test <- bind_rows(test %>% filter(went_on_backorder == "Yes") %>% sample_n(100),
                  test %>% filter(went_on_backorder == "No") %>% sample_n(100)
                  ) %>% select(-sku)

CP Tuning Based Decision Tree

Control=trainControl(method= "repeatedcv",
                     number=10,
                     repeats=10,
                     classProbs=TRUE,
                     sampling = "smote",
                     summaryFunction = twoClassSummary)

cart1=caret::train(went_on_backorder~.,
                   data=train,
                   method="rpart",
                   trControl=Control,
                   tuneLength=10)

cart1

CART 

600 samples
 21 predictor
  2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 10 times) 
Summary of sample sizes: 540, 540, 540, 540, 540, 540, ... 
Addtional sampling using SMOTE

Resampling results across tuning parameters:

  cp          ROC        Sens        Spec     
  0.00000000  0.8582333  0.77933333  0.8256667
  0.06222222  0.8315389  0.81233333  0.8326667
  0.12444444  0.7834500  0.76466667  0.8010000
  0.18666667  0.7766444  0.75733333  0.7943333
  0.24888889  0.7192944  0.61633333  0.8210000
  0.31111111  0.7116667  0.59700000  0.8263333
  0.37333333  0.7100000  0.58866667  0.8313333
  0.43555556  0.6961667  0.54266667  0.8496667
  0.49777778  0.5561667  0.15466667  0.9576667
  0.56000000  0.5048333  0.01233333  0.9973333

ROC was used to select the optimal model using  the largest value.
The final value used for the model was cp = 0.

library(partykit)
library(party)

fancyRpartPlot(cart1$finalModel,palettes="RdPu", sub="", main="Method:CP Tuning Based Decision Tree")

confusionMatrix(cart1,positive ="Yes")

Cross-Validated (10 fold, repeated 10 times) Confusion Matrix 

(entries are percentual average cell counts across resamples)
 
          Reference
Prediction   No  Yes
       No  39.0  8.7
       Yes 11.0 41.3
                            
 Accuracy (average) : 0.8025

pred1 <- predict(cart1,
                test ,
                type="prob") %>% cbind(test,.)
pred1$Predicted=predict(cart1,test)

confusionMatrix(pred1$Predicted,
                reference=pred1$went_on_backorder,
                positive ="Yes",
                mode="everything")

Confusion Matrix and Statistics

          Reference
Prediction No Yes
       No  83  21
       Yes 17  79
                                             
               Accuracy : 0.81               
                 95% CI : (0.7487, 0.8619)   
    No Information Rate : 0.5                
    P-Value [Acc > NIR] : <0.0000000000000002
                                             
                  Kappa : 0.62               
 Mcnemar's Test P-Value : 0.6265             
                                             
            Sensitivity : 0.7900             
            Specificity : 0.8300             
         Pos Pred Value : 0.8229             
         Neg Pred Value : 0.7981             
              Precision : 0.8229             
                 Recall : 0.7900             
                     F1 : 0.8061             
             Prevalence : 0.5000             
         Detection Rate : 0.3950             
   Detection Prevalence : 0.4800             
      Balanced Accuracy : 0.8100             
                                             
       'Positive' Class : Yes

rpart_results_comparison <- data.frame(c("CP Based", "Max Tree Based", "Refined"))
rpart_results_comparison$Accuracy = 0
colnames(rpart_results_comparison)[1] <- "rPart_Method"
rpart_results_comparison$Accuracy[1] <- confusionMatrix(pred1$Predicted,
                reference=pred1$went_on_backorder,
                positive ="Yes",
                mode="everything")$overall[[1]] %>% as.numeric()

CP Tuning Based Tree has an accuracy of 0.81.

Max Tree Depth Based Decision Tree

cart2=caret::train(went_on_backorder~.,
                   data=train,
                   method = "rpart2",
                   trControl=Control,
                   tuneLength=10)

note: only 6 possible values of the max tree depth from the initial fit.
 Truncating the grid to 6 .

cart2

CART 

600 samples
 21 predictor
  2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 10 times) 
Summary of sample sizes: 540, 540, 540, 540, 540, 540, ... 
Addtional sampling using SMOTE

Resampling results across tuning parameters:

  maxdepth  ROC        Sens       Spec     
   1        0.7038333  0.5866667  0.8210000
   2        0.8185111  0.8100000  0.8206667
   4        0.8501833  0.8120000  0.8476667
  10        0.8590111  0.7953333  0.8590000
  12        0.8596611  0.7950000  0.8616667
  15        0.8596611  0.7950000  0.8616667

ROC was used to select the optimal model using  the largest value.
The final value used for the model was maxdepth = 12.

fancyRpartPlot(cart2$finalModel,palettes="RdPu", main="Method: Max Tree Depth Based Decision Tree", sub="")

confusionMatrix(cart2,positive ="Yes")

Cross-Validated (10 fold, repeated 10 times) Confusion Matrix 

(entries are percentual average cell counts across resamples)
 
          Reference
Prediction   No  Yes
       No  39.8  6.9
       Yes 10.2 43.1
                            
 Accuracy (average) : 0.8283

pred2 <- predict(cart2,
                test,
                type="prob") %>% cbind(test,.)
pred2$Predicted=predict(cart2,test)

confusionMatrix(pred2$Predicted,
                reference=pred2$went_on_backorder,
                positive ="Yes",
                mode="everything")

Confusion Matrix and Statistics

          Reference
Prediction No Yes
       No  79  14
       Yes 21  86
                                             
               Accuracy : 0.825              
                 95% CI : (0.7651, 0.875)    
    No Information Rate : 0.5                
    P-Value [Acc > NIR] : <0.0000000000000002
                                             
                  Kappa : 0.65               
 Mcnemar's Test P-Value : 0.3105             
                                             
            Sensitivity : 0.8600             
            Specificity : 0.7900             
         Pos Pred Value : 0.8037             
         Neg Pred Value : 0.8495             
              Precision : 0.8037             
                 Recall : 0.8600             
                     F1 : 0.8309             
             Prevalence : 0.5000             
         Detection Rate : 0.4300             
   Detection Prevalence : 0.5350             
      Balanced Accuracy : 0.8250             
                                             
       'Positive' Class : Yes

rpart_results_comparison$Accuracy[2] <- confusionMatrix(pred2$Predicted,
                reference=pred2$went_on_backorder,
                positive ="Yes",
                mode="everything")$overall[[1]] %>% as.numeric()

Max Tree Based Tuning has an accuracy of 0.825.

Grid Tuning Procedure

task4f=makeClassifTask(id="backorder",data=train,target="went_on_backorder",positive = "Yes")

tasktest=makeClassifTask(id="backorder",data=test,target="went_on_backorder",positive = "Yes")

learner = makeLearner("classif.rpart", predict.type = "prob")

Parameters Set

learner$par.set

                   Type len  Def   Constr Req Tunable Trafo
minsplit        integer   -   20 1 to Inf   -    TRUE     -
minbucket       integer   -    - 1 to Inf   -    TRUE     -
cp              numeric   - 0.01   0 to 1   -    TRUE     -
maxcompete      integer   -    4 0 to Inf   -    TRUE     -
maxsurrogate    integer   -    5 0 to Inf   -    TRUE     -
usesurrogate   discrete   -    2    0,1,2   -    TRUE     -
surrogatestyle discrete   -    0      0,1   -    TRUE     -
maxdepth        integer   -   30  1 to 30   -    TRUE     -
xval            integer   -   10 0 to Inf   -   FALSE     -
parms           untyped   -    -        -   -    TRUE     -

Parameter Tuning

Optimal combination of CP and MaxtreeDepth will be choosen for our algorithm.

ps=makeParamSet(makeDiscreteParam("maxdepth",values = c(1,2,3,4,5,6,7)),makeNumericParam("cp",lower=0.01,upper=0.1))

ctrlgrid = makeTuneControlGrid()

rdesc = makeResampleDesc("RepCV",reps = 10,folds=10)


res=tuneParams(learner,
               task=task4f,
               resampling=rdesc,
               par.set=ps,
               control=ctrlgrid,
               measures = list(mmce,bac))

res$x

$maxdepth
[1] 6

$cp
[1] 0.07

The optimal tuning is a max depth of 6 and a cp of 0.07.

Mean Misclassification Error Grid Tuning Results

resdf=generateHyperParsEffectData(res)

resdata=resdf$data%>%tbl_df()

resdata %>%
  ggplot(aes(x=cp,y=maxdepth))+
  geom_point(aes(size=mmce.test.mean,fill=mmce.test.mean),alpha=0.6,shape=21)+
  geom_vline(xintercept=res$x$cp,color="red",size=0.7)+
  geom_hline(yintercept=res$x$maxdepth,color="red",size=0.7)+
  scale_fill_gradient(high="purple",low="#ff0033") + 
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold")) +
  labs(title = "Mean Misclassification Error (MMCE) Grid Tuning")

Balance Accuracy Grid Tuning

resdata %>%
  ggplot(aes(x=cp,y=maxdepth))+
  geom_point(aes(size=bac.test.mean,fill=bac.test.mean),alpha=0.6,shape=21)+
  geom_vline(xintercept=res$x$cp,color="blue",size=0.7)+
  geom_hline(yintercept=res$x$maxdepth,color="blue",size=0.7)+
  scale_fill_gradient(low="purple",high="#ff0033") + 
  theme(plot.title = element_text(face="bold", hjust = 0.5),
            legend.background = element_rect(size = 0.1), 
            legend.position = "right",
            legend.direction = "vertical",
            legend.title = element_text(face = "bold")) +
  labs(title = "Balance Accuracy (BAC) Grid Tuning")

Re-Adjusting With New Refined Parameters

learner2=setHyperPars(learner,par.vals = res$x)

cartmlr=mlr::train(learner2,task4f)
predmlr=predict(cartmlr,tasktest)

mets=list(auc,bac,tpr,tnr,mmce,ber,fpr,fnr)

performance(predmlr, measures =mets)

    auc     bac     tpr     tnr    mmce     ber     fpr     fnr 
0.87095 0.87500 0.84000 0.91000 0.12500 0.12500 0.09000 0.16000

truth=predmlr$data$truth

confusionMatrix(predmlr$data$response,
                reference=predmlr$data$truth,
                positive ="Yes",
                mode="everything")

Confusion Matrix and Statistics

          Reference
Prediction No Yes
       No  91  16
       Yes  9  84
                                             
               Accuracy : 0.875              
                 95% CI : (0.821, 0.9174)    
    No Information Rate : 0.5                
    P-Value [Acc > NIR] : <0.0000000000000002
                                             
                  Kappa : 0.75               
 Mcnemar's Test P-Value : 0.2301             
                                             
            Sensitivity : 0.8400             
            Specificity : 0.9100             
         Pos Pred Value : 0.9032             
         Neg Pred Value : 0.8505             
              Precision : 0.9032             
                 Recall : 0.8400             
                     F1 : 0.8705             
             Prevalence : 0.5000             
         Detection Rate : 0.4200             
   Detection Prevalence : 0.4650             
      Balanced Accuracy : 0.8750             
                                             
       'Positive' Class : Yes

rpart_results_comparison$Accuracy[3] <- confusionMatrix(predmlr$data$response,
                reference=predmlr$data$truth,
                positive ="Yes",
                mode="everything")$overall[[1]]

fancyRpartPlot(cartmlr$learner.model,palettes="RdPu", sub="", main = "Refined Grid Tuning Decision Tree")

rPart Tuning Results

rpart_results_comparison %<>% arrange(desc(Accuracy)) 
time_calib = (proc.time() - start_time)

Our Refined Tuning has the best result with 5% higher accuracy than next method.

colnames(rpart_results_comparison)[1] <- "rPart Method"
rpart_results_comparison

    rPart Method Accuracy
1        Refined    0.875
2 Max Tree Based    0.825
3       CP Based    0.810

This rMarkdown script took 14 minutes to run.

Backorders

Benjamin Lott

April 10, 2017