#install.packages("caret", dependencies = c("Depends", "Suggests"))
library(caret)
## Warning: package 'caret' was built under R version 3.1.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.1.2
## Loading required package: ggplot2
library(mlbench)
data(Sonar)
inTrain <- createDataPartition(y=Sonar$Class, p=0.75, list=FALSE)
training <- Sonar[inTrain,]; testing <- Sonar[-inTrain,]
plsFit <- train(Class ~., data=training, method="pls", preProc=c("center", "scale"))
## Loading required package: pls
##
## Attaching package: 'pls'
##
## The following object is masked from 'package:caret':
##
## R2
##
## The following object is masked from 'package:stats':
##
## loadings
plsFit1 <- train(Class~., data=training, method = "pls",
tuneLength=15, metric="ROC",
trControl=(trainControl(method="repeatedcv", repeats=3,
classProbs=TRUE, summaryFunction=twoClassSummary)),
preProc=c("center","scale"))
plsFit1
## Partial Least Squares
##
## 157 samples
## 60 predictor
## 2 classes: 'M', 'R'
##
## Pre-processing: centered, scaled
## Resampling: Cross-Validated (10 fold, repeated 3 times)
##
## Summary of sample sizes: 142, 141, 142, 142, 141, 142, ...
##
## Resampling results across tuning parameters:
##
## ncomp ROC Sens Spec ROC SD Sens SD Spec SD
## 1 0.7760334 0.6564815 0.7053571 0.1346800 0.1370536 0.1847901
## 2 0.8129464 0.7236111 0.7505952 0.1516660 0.1918044 0.1674809
## 3 0.8365575 0.7768519 0.7678571 0.1241883 0.1543559 0.1659320
## 4 0.8264054 0.7777778 0.7434524 0.1342993 0.1581055 0.1653002
## 5 0.7925926 0.7245370 0.7345238 0.1320289 0.1680610 0.1754489
## 6 0.7736524 0.7314815 0.7351190 0.1333448 0.1700383 0.1667308
## 7 0.7708085 0.7208333 0.7333333 0.1282312 0.1881278 0.1627156
## 8 0.7747933 0.7203704 0.7351190 0.1250950 0.2046362 0.1634673
## 9 0.7746280 0.7087963 0.7357143 0.1198253 0.1991589 0.1585722
## 10 0.7730737 0.6847222 0.7327381 0.1205608 0.1984116 0.1508307
## 11 0.7763476 0.7162037 0.7339286 0.1174233 0.1957973 0.1601557
## 12 0.7711392 0.7208333 0.7160714 0.1206202 0.1818349 0.1680621
## 13 0.7680638 0.7138889 0.7113095 0.1280167 0.1957265 0.1701256
## 14 0.7730737 0.7134259 0.7113095 0.1218564 0.1931223 0.1659375
## 15 0.7608879 0.7138889 0.6988095 0.1419384 0.1704784 0.1785550
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was ncomp = 3.
plot(plsFit1)

plsClasses <- predict(plsFit1, newdata=testing)
str(plsClasses)
## Factor w/ 2 levels "M","R": 2 2 2 2 2 1 2 2 2 2 ...
confusionMatrix(data=plsClasses, testing$Class)
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 22 4
## R 5 20
##
## Accuracy : 0.8235
## 95% CI : (0.6913, 0.916)
## No Information Rate : 0.5294
## P-Value [Acc > NIR] : 1.117e-05
##
## Kappa : 0.6467
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8148
## Specificity : 0.8333
## Pos Pred Value : 0.8462
## Neg Pred Value : 0.8000
## Prevalence : 0.5294
## Detection Rate : 0.4314
## Detection Prevalence : 0.5098
## Balanced Accuracy : 0.8241
##
## 'Positive' Class : M
##
plsProb <- predict(plsFit1, newdata=testing, type="prob")
head(plsProb)
## M R
## 4 0.4447738 0.5552262
## 10 0.4724479 0.5275521
## 14 0.3452033 0.6547967
## 17 0.4759336 0.5240664
## 19 0.2985823 0.7014177
## 34 0.5445711 0.4554289