Sec.12 - TRAINING OPTIONS

Based on Jeef Leek's slides for the “Practical Machine Learning” course.

SPAM Example

library(caret) 
library(kernlab) 
library("e1071")
data(spam)
inTrain <- createDataPartition(y=spam$type, p=0.75, list=FALSE)
training <- spam[inTrain,]
testing <- spam[-inTrain,]
modelFit <- train(type ~ . , data=training, method="glm")

Train options

args(train.default)
## function (x, y, method = "rf", preProcess = NULL, ..., weights = NULL, 
##     metric = ifelse(is.factor(y), "Accuracy", "RMSE"), maximize = ifelse(metric == 
##         "RMSE", FALSE, TRUE), trControl = trainControl(), tuneGrid = NULL, 
##     tuneLength = 3) 
## NULL

Metric options

Continous outcomes:

Categorical outcomes:

trainControl()

args(trainControl)
## function (method = "boot", number = ifelse(grepl("cv", method), 
##     10, 25), repeats = ifelse(grepl("cv", method), 1, number), 
##     p = 0.75, initialWindow = NULL, horizon = 1, fixedWindow = TRUE, 
##     verboseIter = FALSE, returnData = TRUE, returnResamp = "final", 
##     savePredictions = FALSE, classProbs = FALSE, summaryFunction = defaultSummary, 
##     selectionFunction = "best", preProcOptions = list(thresh = 0.95, 
##         ICAcomp = 3, k = 5), index = NULL, indexOut = NULL, timingSamps = 0, 
##     predictionBounds = rep(FALSE, 2), seeds = NA, adaptive = list(min = 5, 
##         alpha = 0.05, method = "gls", complete = TRUE), allowParallel = TRUE) 
## NULL

trainControl() resampling

Setting the seed

seed example #1

set.seed(1235)
modelFit2 <- train(type ~ ., data=training, method="glm")
modelFit2
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictors
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## 
## Resampling results
## 
##   Accuracy  Kappa  Accuracy SD  Kappa SD
##   0.9       0.8    0.005        0.01    
## 
## 

seed example #2

set.seed(1235)
modelFit3 <- train(type ~ ., data=training, method="glm")
modelFit3
## Generalized Linear Model 
## 
## 3451 samples
##   57 predictors
##    2 classes: 'nonspam', 'spam' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## 
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ... 
## 
## Resampling results
## 
##   Accuracy  Kappa  Accuracy SD  Kappa SD
##   0.9       0.8    0.005        0.01    
## 
## 

Further resources