Data Splitting Example, this is what we use in the last example

1.CreateDataPartition

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(kernlab)
data(spam)
inTrain<- createDataPartition(y=spam$type,p=0.75,list=FALSE)
training<- spam[inTrain,]
testing<- spam[-inTrain,]
dim(training)
## [1] 3451   58

2.K-fold(Return test)–Cross validation

set.seed(32323)
folds<- createFolds(y=spam$type,k=10,list=TRUE,returnTrain=TRUE)
#It will return the train data set
sapply(folds,length)
## Fold01 Fold02 Fold03 Fold04 Fold05 Fold06 Fold07 Fold08 Fold09 Fold10 
##   4141   4140   4141   4142   4140   4142   4141   4141   4140   4141
folds[[1]][1:10]
##  [1]  1  2  3  4  5  6  7  8  9 10

3.k-fold(No return test)–Cross validation

set.seed(32323)
folds<- createFolds(y=spam$type,k=10,list=TRUE,returnTrain=FALSE)
#It will return the test data set
sapply(folds,length)
## Fold01 Fold02 Fold03 Fold04 Fold05 Fold06 Fold07 Fold08 Fold09 Fold10 
##    460    461    460    459    461    459    460    460    461    460
folds[[1]][1:10]
##  [1] 24 27 32 40 41 43 55 58 63 68

4.Resampling

set.seed(32323)
folds<- createResample(y=spam$type,times=10,list=TRUE)
#Time mean how many time you want to resample the data
sapply(folds,length)
## Resample01 Resample02 Resample03 Resample04 Resample05 Resample06 
##       4601       4601       4601       4601       4601       4601 
## Resample07 Resample08 Resample09 Resample10 
##       4601       4601       4601       4601
folds[[1]][1:10]
##  [1]  1  2  3  3  3  5  5  7  8 12
##Because it is resampling, so some data might be repetive 

5.Time Slices

set.seed(32323)
time<- 1:1000
folds<- createTimeSlices(y=time,initialWindow=20,horizon=10)
#It means, put first 20 observations into training data set, the later 10 observations into testing data set
names(folds)
## [1] "train" "test"
folds$train[[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
folds$test[[1]]
##  [1] 21 22 23 24 25 26 27 28 29 30