This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(caret)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

set.seed(2969)
imbal_train <- twoClassSim(10000, intercept = -20, linearVars = 20)
imbal_test  <- twoClassSim(10000, intercept = -20, linearVars = 20)
table(imbal_train$Class)
set.seed(9560)
down_train <- downSample(x = imbal_train[, -ncol(imbal_train)],
                         y = imbal_train$Class)
table(down_train$Class) 
set.seed(9560)
up_train <- upSample(x = imbal_train[, -ncol(imbal_train)],
                     y = imbal_train$Class)                         
table(up_train$Class) 
library(DMwR)

set.seed(9560)
smote_train <- SMOTE(Class ~ ., data  = imbal_train)                         
table(smote_train$Class) 
library(ROSE)

set.seed(9560)
rose_train <- ROSE(Class ~ ., data  = imbal_train)$data                         
table(rose_train$Class) 
ctrl <- trainControl(method = "repeatedcv", repeats = 5,
                     classProbs = TRUE,
                     summaryFunction = twoClassSummary)

set.seed(5627)
orig_fit <- train(Class ~ ., data = imbal_train, 
                  method = "treebag",
                  nbagg = 50,
                  metric = "ROC",
                  trControl = ctrl)

set.seed(5627)
down_outside <- train(Class ~ ., data = down_train, 
                      method = "treebag",
                      nbagg = 50,
                      metric = "ROC",
                      trControl = ctrl)

set.seed(5627)
up_outside <- train(Class ~ ., data = up_train, 
                    method = "treebag",
                    nbagg = 50,
                    metric = "ROC",
                    trControl = ctrl)

set.seed(5627)
rose_outside <- train(Class ~ ., data = rose_train, 
                      method = "treebag",
                      nbagg = 50,
                      metric = "ROC",
                      trControl = ctrl)


set.seed(5627)
smote_outside <- train(Class ~ ., data = smote_train, 
                       method = "treebag",
                       nbagg = 50,
                       metric = "ROC",
                       trControl = ctrl)
outside_models <- list(original = orig_fit,
                       down = down_outside,
                       up = up_outside,
                       SMOTE = smote_outside,
                       ROSE = rose_outside)

outside_resampling <- resamples(outside_models)

test_roc <- function(model, data) {
  library(pROC)
  roc_obj <- roc(data$Class, 
                 predict(model, data, type = "prob")[, "Class1"],
                 levels = c("Class2", "Class1"))
  ci(roc_obj)
  }

outside_test <- lapply(outside_models, test_roc, data = imbal_test)
outside_test <- lapply(outside_test, as.vector)
outside_test <- do.call("rbind", outside_test)
colnames(outside_test) <- c("lower", "ROC", "upper")
outside_test <- as.data.frame(outside_test)

summary(outside_resampling, metric = "ROC")
outside_test
ctrl <- trainControl(method = "repeatedcv", repeats = 5,
                     classProbs = TRUE,
                     summaryFunction = twoClassSummary,
                     ## new option here:
                     sampling = "down")

set.seed(5627)
down_inside <- train(Class ~ ., data = imbal_train,
                     method = "treebag",
                     nbagg = 50,
                     metric = "ROC",
                     trControl = ctrl)

## now just change that option
ctrl$sampling <- "up"

set.seed(5627)
up_inside <- train(Class ~ ., data = imbal_train,
                   method = "treebag",
                   nbagg = 50,
                   metric = "ROC",
                   trControl = ctrl)

ctrl$sampling <- "rose"

set.seed(5627)
rose_inside <- train(Class ~ ., data = imbal_train,
                     method = "treebag",
                     nbagg = 50,
                     metric = "ROC",
                     trControl = ctrl)

ctrl$sampling <- "smote"

set.seed(5627)
smote_inside <- train(Class ~ ., data = imbal_train,
                      method = "treebag",
                      nbagg = 50,
                      metric = "ROC",
                      trControl = ctrl)
inside_models <- list(original = orig_fit,
                      down = down_inside,
                      up = up_inside,
                      SMOTE = smote_inside,
                      ROSE = rose_inside)

inside_resampling <- resamples(inside_models)

inside_test <- lapply(inside_models, test_roc, data = imbal_test)
inside_test <- lapply(inside_test, as.vector)
inside_test <- do.call("rbind", inside_test)
colnames(inside_test) <- c("lower", "ROC", "upper")
inside_test <- as.data.frame(inside_test)

summary(inside_resampling, metric = "ROC")
inside_test
smotest <- list(name = "SMOTE with more neighbors!",
                func = function (x, y) {
                  library(DMwR)
                  dat <- if (is.data.frame(x)) x else as.data.frame(x)
                  dat$.y <- y
                  dat <- SMOTE(.y ~ ., data = dat, k = 10)
                  list(x = dat[, !grepl(".y", colnames(dat), fixed = TRUE)], 
                       y = dat$.y)
                  },
                first = TRUE)
ctrl <- trainControl(method = "repeatedcv", repeats = 5,
                     classProbs = TRUE,
                     summaryFunction = twoClassSummary,
                     sampling = smotest)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2sgXyBTdWJzYW1wbGluZyBGb3IgQ2xhc3MgSW1iYWxhbmNlcyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQpgYGB7cn0NCmxpYnJhcnkoY2FyZXQpDQpgYGANCg0KQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLg0KDQpXaGVuIHlvdSBzYXZlIHRoZSBub3RlYm9vaywgYW4gSFRNTCBmaWxlIGNvbnRhaW5pbmcgdGhlIGNvZGUgYW5kIG91dHB1dCB3aWxsIGJlIHNhdmVkIGFsb25nc2lkZSBpdCAoY2xpY2sgdGhlICpQcmV2aWV3KiBidXR0b24gb3IgcHJlc3MgKkN0cmwrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4NCg0KYGBge3J9DQpzZXQuc2VlZCgyOTY5KQ0KaW1iYWxfdHJhaW4gPC0gdHdvQ2xhc3NTaW0oMTAwMDAsIGludGVyY2VwdCA9IC0yMCwgbGluZWFyVmFycyA9IDIwKQ0KaW1iYWxfdGVzdCAgPC0gdHdvQ2xhc3NTaW0oMTAwMDAsIGludGVyY2VwdCA9IC0yMCwgbGluZWFyVmFycyA9IDIwKQ0KdGFibGUoaW1iYWxfdHJhaW4kQ2xhc3MpDQpgYGANCg0KYGBge3IgZG93blNhbXBsZX0NCnNldC5zZWVkKDk1NjApDQpkb3duX3RyYWluIDwtIGRvd25TYW1wbGUoeCA9IGltYmFsX3RyYWluWywgLW5jb2woaW1iYWxfdHJhaW4pXSwNCiAgICAgICAgICAgICAgICAgICAgICAgICB5ID0gaW1iYWxfdHJhaW4kQ2xhc3MpDQp0YWJsZShkb3duX3RyYWluJENsYXNzKSANCmBgYA0KDQpgYGB7ciB1cFNhbXBsZX0NCnNldC5zZWVkKDk1NjApDQp1cF90cmFpbiA8LSB1cFNhbXBsZSh4ID0gaW1iYWxfdHJhaW5bLCAtbmNvbChpbWJhbF90cmFpbildLA0KICAgICAgICAgICAgICAgICAgICAgeSA9IGltYmFsX3RyYWluJENsYXNzKSAgICAgICAgICAgICAgICAgICAgICAgICANCnRhYmxlKHVwX3RyYWluJENsYXNzKSANCmBgYA0KDQpgYGB7ciBTTU9URX0NCmxpYnJhcnkoRE13UikNCg0Kc2V0LnNlZWQoOTU2MCkNCnNtb3RlX3RyYWluIDwtIFNNT1RFKENsYXNzIH4gLiwgZGF0YSAgPSBpbWJhbF90cmFpbikgICAgICAgICAgICAgICAgICAgICAgICAgDQp0YWJsZShzbW90ZV90cmFpbiRDbGFzcykgDQpgYGANCg0KYGBge3IgUk9TRX0NCmxpYnJhcnkoUk9TRSkNCg0Kc2V0LnNlZWQoOTU2MCkNCnJvc2VfdHJhaW4gPC0gUk9TRShDbGFzcyB+IC4sIGRhdGEgID0gaW1iYWxfdHJhaW4pJGRhdGEgICAgICAgICAgICAgICAgICAgICAgICAgDQp0YWJsZShyb3NlX3RyYWluJENsYXNzKSANCmBgYA0KDQpgYGB7ciBtb2RlbCBmaXR9DQpjdHJsIDwtIHRyYWluQ29udHJvbChtZXRob2QgPSAicmVwZWF0ZWRjdiIsIHJlcGVhdHMgPSA1LA0KICAgICAgICAgICAgICAgICAgICAgY2xhc3NQcm9icyA9IFRSVUUsDQogICAgICAgICAgICAgICAgICAgICBzdW1tYXJ5RnVuY3Rpb24gPSB0d29DbGFzc1N1bW1hcnkpDQoNCnNldC5zZWVkKDU2MjcpDQpvcmlnX2ZpdCA8LSB0cmFpbihDbGFzcyB+IC4sIGRhdGEgPSBpbWJhbF90cmFpbiwgDQogICAgICAgICAgICAgICAgICBtZXRob2QgPSAidHJlZWJhZyIsDQogICAgICAgICAgICAgICAgICBuYmFnZyA9IDUwLA0KICAgICAgICAgICAgICAgICAgbWV0cmljID0gIlJPQyIsDQogICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdHJsKQ0KDQpzZXQuc2VlZCg1NjI3KQ0KZG93bl9vdXRzaWRlIDwtIHRyYWluKENsYXNzIH4gLiwgZGF0YSA9IGRvd25fdHJhaW4sIA0KICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJ0cmVlYmFnIiwNCiAgICAgICAgICAgICAgICAgICAgICBuYmFnZyA9IDUwLA0KICAgICAgICAgICAgICAgICAgICAgIG1ldHJpYyA9ICJST0MiLA0KICAgICAgICAgICAgICAgICAgICAgIHRyQ29udHJvbCA9IGN0cmwpDQoNCnNldC5zZWVkKDU2MjcpDQp1cF9vdXRzaWRlIDwtIHRyYWluKENsYXNzIH4gLiwgZGF0YSA9IHVwX3RyYWluLCANCiAgICAgICAgICAgICAgICAgICAgbWV0aG9kID0gInRyZWViYWciLA0KICAgICAgICAgICAgICAgICAgICBuYmFnZyA9IDUwLA0KICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiUk9DIiwNCiAgICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gY3RybCkNCg0Kc2V0LnNlZWQoNTYyNykNCnJvc2Vfb3V0c2lkZSA8LSB0cmFpbihDbGFzcyB+IC4sIGRhdGEgPSByb3NlX3RyYWluLCANCiAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAidHJlZWJhZyIsDQogICAgICAgICAgICAgICAgICAgICAgbmJhZ2cgPSA1MCwNCiAgICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiUk9DIiwNCiAgICAgICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdHJsKQ0KDQoNCnNldC5zZWVkKDU2MjcpDQpzbW90ZV9vdXRzaWRlIDwtIHRyYWluKENsYXNzIH4gLiwgZGF0YSA9IHNtb3RlX3RyYWluLCANCiAgICAgICAgICAgICAgICAgICAgICAgbWV0aG9kID0gInRyZWViYWciLA0KICAgICAgICAgICAgICAgICAgICAgICBuYmFnZyA9IDUwLA0KICAgICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiUk9DIiwNCiAgICAgICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gY3RybCkNCmBgYA0KDQpgYGB7cn0NCm91dHNpZGVfbW9kZWxzIDwtIGxpc3Qob3JpZ2luYWwgPSBvcmlnX2ZpdCwNCiAgICAgICAgICAgICAgICAgICAgICAgZG93biA9IGRvd25fb3V0c2lkZSwNCiAgICAgICAgICAgICAgICAgICAgICAgdXAgPSB1cF9vdXRzaWRlLA0KICAgICAgICAgICAgICAgICAgICAgICBTTU9URSA9IHNtb3RlX291dHNpZGUsDQogICAgICAgICAgICAgICAgICAgICAgIFJPU0UgPSByb3NlX291dHNpZGUpDQoNCm91dHNpZGVfcmVzYW1wbGluZyA8LSByZXNhbXBsZXMob3V0c2lkZV9tb2RlbHMpDQoNCnRlc3Rfcm9jIDwtIGZ1bmN0aW9uKG1vZGVsLCBkYXRhKSB7DQogIGxpYnJhcnkocFJPQykNCiAgcm9jX29iaiA8LSByb2MoZGF0YSRDbGFzcywgDQogICAgICAgICAgICAgICAgIHByZWRpY3QobW9kZWwsIGRhdGEsIHR5cGUgPSAicHJvYiIpWywgIkNsYXNzMSJdLA0KICAgICAgICAgICAgICAgICBsZXZlbHMgPSBjKCJDbGFzczIiLCAiQ2xhc3MxIikpDQogIGNpKHJvY19vYmopDQogIH0NCg0Kb3V0c2lkZV90ZXN0IDwtIGxhcHBseShvdXRzaWRlX21vZGVscywgdGVzdF9yb2MsIGRhdGEgPSBpbWJhbF90ZXN0KQ0Kb3V0c2lkZV90ZXN0IDwtIGxhcHBseShvdXRzaWRlX3Rlc3QsIGFzLnZlY3RvcikNCm91dHNpZGVfdGVzdCA8LSBkby5jYWxsKCJyYmluZCIsIG91dHNpZGVfdGVzdCkNCmNvbG5hbWVzKG91dHNpZGVfdGVzdCkgPC0gYygibG93ZXIiLCAiUk9DIiwgInVwcGVyIikNCm91dHNpZGVfdGVzdCA8LSBhcy5kYXRhLmZyYW1lKG91dHNpZGVfdGVzdCkNCg0Kc3VtbWFyeShvdXRzaWRlX3Jlc2FtcGxpbmcsIG1ldHJpYyA9ICJST0MiKQ0Kb3V0c2lkZV90ZXN0DQpgYGANCg0KYGBge3IgU3Vic2FtcGxpbmcgRHVyaW5nIFJlc2FtcGxpbmd9DQpjdHJsIDwtIHRyYWluQ29udHJvbChtZXRob2QgPSAicmVwZWF0ZWRjdiIsIHJlcGVhdHMgPSA1LA0KICAgICAgICAgICAgICAgICAgICAgY2xhc3NQcm9icyA9IFRSVUUsDQogICAgICAgICAgICAgICAgICAgICBzdW1tYXJ5RnVuY3Rpb24gPSB0d29DbGFzc1N1bW1hcnksDQogICAgICAgICAgICAgICAgICAgICAjIyBuZXcgb3B0aW9uIGhlcmU6DQogICAgICAgICAgICAgICAgICAgICBzYW1wbGluZyA9ICJkb3duIikNCg0Kc2V0LnNlZWQoNTYyNykNCmRvd25faW5zaWRlIDwtIHRyYWluKENsYXNzIH4gLiwgZGF0YSA9IGltYmFsX3RyYWluLA0KICAgICAgICAgICAgICAgICAgICAgbWV0aG9kID0gInRyZWViYWciLA0KICAgICAgICAgICAgICAgICAgICAgbmJhZ2cgPSA1MCwNCiAgICAgICAgICAgICAgICAgICAgIG1ldHJpYyA9ICJST0MiLA0KICAgICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gY3RybCkNCg0KIyMgbm93IGp1c3QgY2hhbmdlIHRoYXQgb3B0aW9uDQpjdHJsJHNhbXBsaW5nIDwtICJ1cCINCg0Kc2V0LnNlZWQoNTYyNykNCnVwX2luc2lkZSA8LSB0cmFpbihDbGFzcyB+IC4sIGRhdGEgPSBpbWJhbF90cmFpbiwNCiAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAidHJlZWJhZyIsDQogICAgICAgICAgICAgICAgICAgbmJhZ2cgPSA1MCwNCiAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiUk9DIiwNCiAgICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdHJsKQ0KDQpjdHJsJHNhbXBsaW5nIDwtICJyb3NlIg0KDQpzZXQuc2VlZCg1NjI3KQ0Kcm9zZV9pbnNpZGUgPC0gdHJhaW4oQ2xhc3MgfiAuLCBkYXRhID0gaW1iYWxfdHJhaW4sDQogICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAidHJlZWJhZyIsDQogICAgICAgICAgICAgICAgICAgICBuYmFnZyA9IDUwLA0KICAgICAgICAgICAgICAgICAgICAgbWV0cmljID0gIlJPQyIsDQogICAgICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdHJsKQ0KDQpjdHJsJHNhbXBsaW5nIDwtICJzbW90ZSINCg0Kc2V0LnNlZWQoNTYyNykNCnNtb3RlX2luc2lkZSA8LSB0cmFpbihDbGFzcyB+IC4sIGRhdGEgPSBpbWJhbF90cmFpbiwNCiAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAidHJlZWJhZyIsDQogICAgICAgICAgICAgICAgICAgICAgbmJhZ2cgPSA1MCwNCiAgICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiUk9DIiwNCiAgICAgICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdHJsKQ0KYGBgDQoNCmBgYHtyfQ0KaW5zaWRlX21vZGVscyA8LSBsaXN0KG9yaWdpbmFsID0gb3JpZ19maXQsDQogICAgICAgICAgICAgICAgICAgICAgZG93biA9IGRvd25faW5zaWRlLA0KICAgICAgICAgICAgICAgICAgICAgIHVwID0gdXBfaW5zaWRlLA0KICAgICAgICAgICAgICAgICAgICAgIFNNT1RFID0gc21vdGVfaW5zaWRlLA0KICAgICAgICAgICAgICAgICAgICAgIFJPU0UgPSByb3NlX2luc2lkZSkNCg0KaW5zaWRlX3Jlc2FtcGxpbmcgPC0gcmVzYW1wbGVzKGluc2lkZV9tb2RlbHMpDQoNCmluc2lkZV90ZXN0IDwtIGxhcHBseShpbnNpZGVfbW9kZWxzLCB0ZXN0X3JvYywgZGF0YSA9IGltYmFsX3Rlc3QpDQppbnNpZGVfdGVzdCA8LSBsYXBwbHkoaW5zaWRlX3Rlc3QsIGFzLnZlY3RvcikNCmluc2lkZV90ZXN0IDwtIGRvLmNhbGwoInJiaW5kIiwgaW5zaWRlX3Rlc3QpDQpjb2xuYW1lcyhpbnNpZGVfdGVzdCkgPC0gYygibG93ZXIiLCAiUk9DIiwgInVwcGVyIikNCmluc2lkZV90ZXN0IDwtIGFzLmRhdGEuZnJhbWUoaW5zaWRlX3Rlc3QpDQoNCnN1bW1hcnkoaW5zaWRlX3Jlc2FtcGxpbmcsIG1ldHJpYyA9ICJST0MiKQ0KaW5zaWRlX3Rlc3QNCmBgYA0KDQpgYGB7ciBXUkFQIFNNT1RFfQ0Kc21vdGVzdCA8LSBsaXN0KG5hbWUgPSAiU01PVEUgd2l0aCBtb3JlIG5laWdoYm9ycyEiLA0KICAgICAgICAgICAgICAgIGZ1bmMgPSBmdW5jdGlvbiAoeCwgeSkgew0KICAgICAgICAgICAgICAgICAgbGlicmFyeShETXdSKQ0KICAgICAgICAgICAgICAgICAgZGF0IDwtIGlmIChpcy5kYXRhLmZyYW1lKHgpKSB4IGVsc2UgYXMuZGF0YS5mcmFtZSh4KQ0KICAgICAgICAgICAgICAgICAgZGF0JC55IDwtIHkNCiAgICAgICAgICAgICAgICAgIGRhdCA8LSBTTU9URSgueSB+IC4sIGRhdGEgPSBkYXQsIGsgPSAxMCkNCiAgICAgICAgICAgICAgICAgIGxpc3QoeCA9IGRhdFssICFncmVwbCgiLnkiLCBjb2xuYW1lcyhkYXQpLCBmaXhlZCA9IFRSVUUpXSwgDQogICAgICAgICAgICAgICAgICAgICAgIHkgPSBkYXQkLnkpDQogICAgICAgICAgICAgICAgICB9LA0KICAgICAgICAgICAgICAgIGZpcnN0ID0gVFJVRSkNCmN0cmwgPC0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJyZXBlYXRlZGN2IiwgcmVwZWF0cyA9IDUsDQogICAgICAgICAgICAgICAgICAgICBjbGFzc1Byb2JzID0gVFJVRSwNCiAgICAgICAgICAgICAgICAgICAgIHN1bW1hcnlGdW5jdGlvbiA9IHR3b0NsYXNzU3VtbWFyeSwNCiAgICAgICAgICAgICAgICAgICAgIHNhbXBsaW5nID0gc21vdGVzdCkNCmBgYA0KDQo=