Based on Jeef Leek's slides for the “Practical Machine Learning” course.
library(caret)
library(kernlab)
library("e1071")
data(spam)
inTrain <- createDataPartition(y=spam$type, p=0.75, list=FALSE)
training <- spam[inTrain,]
testing <- spam[-inTrain,]
modelFit <- train(type ~ . , data=training, method="glm")
args(train.default)
## function (x, y, method = "rf", preProcess = NULL, ..., weights = NULL,
## metric = ifelse(is.factor(y), "Accuracy", "RMSE"), maximize = ifelse(metric ==
## "RMSE", FALSE, TRUE), trControl = trainControl(), tuneGrid = NULL,
## tuneLength = 3)
## NULL
trainControl()Continous outcomes:
Categorical outcomes:
trainControl()args(trainControl)
## function (method = "boot", number = ifelse(grepl("cv", method),
## 10, 25), repeats = ifelse(grepl("cv", method), 1, number),
## p = 0.75, initialWindow = NULL, horizon = 1, fixedWindow = TRUE,
## verboseIter = FALSE, returnData = TRUE, returnResamp = "final",
## savePredictions = FALSE, classProbs = FALSE, summaryFunction = defaultSummary,
## selectionFunction = "best", preProcOptions = list(thresh = 0.95,
## ICAcomp = 3, k = 5), index = NULL, indexOut = NULL, timingSamps = 0,
## predictionBounds = rep(FALSE, 2), seeds = NA, adaptive = list(min = 5,
## alpha = 0.05, method = "gls", complete = TRUE), allowParallel = TRUE)
## NULL
trainControl() resamplingset.seed(1235)
modelFit2 <- train(type ~ ., data=training, method="glm")
modelFit2
## Generalized Linear Model
##
## 3451 samples
## 57 predictors
## 2 classes: 'nonspam', 'spam'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
##
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ...
##
## Resampling results
##
## Accuracy Kappa Accuracy SD Kappa SD
## 0.9 0.8 0.005 0.01
##
##
set.seed(1235)
modelFit3 <- train(type ~ ., data=training, method="glm")
modelFit3
## Generalized Linear Model
##
## 3451 samples
## 57 predictors
## 2 classes: 'nonspam', 'spam'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
##
## Summary of sample sizes: 3451, 3451, 3451, 3451, 3451, 3451, ...
##
## Resampling results
##
## Accuracy Kappa Accuracy SD Kappa SD
## 0.9 0.8 0.005 0.01
##
##