library(caret)
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
## Loading required package: ggplot2
data("iris")
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
inTrain <- createDataPartition(y = iris$Species,p = 0.7, list = FALSE)
training = iris[inTrain,]
testing = iris[-inTrain,]
dim(training)
## [1] 105 5
dim(testing)
## [1] 45 5
qplot(Petal.Width, Sepal.Width, color = Species, data = training)
modFit <- train(Species ~ ., method = "rpart", data=training)
## Loading required package: rpart
print(modFit$finalModel)
## n= 105
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 105 70 setosa (0.33333333 0.33333333 0.33333333)
## 2) Petal.Length< 2.6 35 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 70 35 versicolor (0.00000000 0.50000000 0.50000000)
## 6) Petal.Length< 4.85 35 2 versicolor (0.00000000 0.94285714 0.05714286) *
## 7) Petal.Length>=4.85 35 2 virginica (0.00000000 0.05714286 0.94285714) *
plot(modFit$finalModel, uniform = TRUE, main = "Classification Tree")
text(modFit$finalModel, use.n = TRUE, all = TRUE, cex = 1)
predict(modFit, newdata = testing)
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa setosa setosa versicolor virginica versicolor
## [19] versicolor versicolor versicolor versicolor virginica versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] virginica virginica virginica virginica virginica virginica
## [37] virginica virginica versicolor virginica virginica virginica
## [43] virginica virginica virginica
## Levels: setosa versicolor virginica
library(ElemStatLearn)
data(ozone, package = "ElemStatLearn")
ozone = ozone[order(ozone$ozone),]
head(ozone)
## ozone radiation temperature wind
## 17 1 8 59 9.7
## 19 4 25 61 9.7
## 14 6 78 57 18.4
## 45 7 48 80 14.3
## 106 7 49 69 10.3
## 7 8 19 61 20.1
l1 <- matrix (NA, nrow = 10, ncol = 155)
for(i in 1:10){
ss <- sample(1:dim(ozone)[1], replace = T)
ozone0 = ozone[ss, ]
ozone0 = ozone[order(ozone0$ozone),]
loess0 = loess(temperature ~ ozone, data = ozone0, span = 0.2)
l1[i, ] = predict( loess0, newdata = data.frame(ozone = 1:155))
}
plot(ozone$ozone, ozone$temperature,pch = 19, cex = 0.5)
for(i in 1:10){lines(1:155, l1[i,], col = "grey",lwd = 2)}
Load the cell segmentation data from the AppliedPredictiveModeling package using the commands:
library(AppliedPredictiveModeling)
data(segmentationOriginal)
library(caret)
If K is small in a K-fold cross validation is the bias in the estimate of out-of-sample (test set) accuracy smaller or bigger? If K is small is the variance in the estimate of out-of-sample (test set) accuracy smaller or bigger. Is K large or small in leave one out cross validation?
library(pgmm)
data(olive)
olive = olive[,-1]
newdata = as.data.frame(t(colMeans(olive)))
library(ElemStatLearn)
##
## Attaching package: 'ElemStatLearn'
## The following object is masked _by_ '.GlobalEnv':
##
## ozone
data(SAheart)
set.seed(8484)
train = sample(1:dim(SAheart)[1],size=dim(SAheart)[1]/2,replace=F)
trainSA = SAheart[train,]
testSA = SAheart[-train,]
missClass = function(values,prediction){sum(((prediction > 0.5)*1) != values)/length(values)}
set.seed(13234)
fitMode = train(chd ~ age+alcohol+obesity+tobacco+typea+ldl, data = trainSA, method = "glm", family = "binomial")
## Warning in train.default(x, y, weights = w, ...): You are trying to do
## regression and your outcome only has two possible values Are you trying to
## do classification? If so, use a 2 level factor as your outcome column.
result4 = predict(fitMode, newdata = testSA)
missClass(trainSA$chd, predict(fitMode, newdata = trainSA))
## [1] 0.2727273
missClass(testSA$chd, predict(fitMode, newdata = testSA))
## [1] 0.3116883
library(ElemStatLearn)
data(vowel.train)
data(vowel.test)
set.seed(33833)