Partial Least Square (PLS) Illustration

#install.packages("caret", dependencies = c("Depends", "Suggests"))
library(caret)

## Warning: package 'caret' was built under R version 3.1.3

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 3.1.2

## Loading required package: ggplot2

library(mlbench)
data(Sonar)
inTrain <- createDataPartition(y=Sonar$Class, p=0.75, list=FALSE)
training <- Sonar[inTrain,]; testing <- Sonar[-inTrain,]
plsFit <- train(Class ~., data=training, method="pls", preProc=c("center", "scale"))

## Loading required package: pls
## 
## Attaching package: 'pls'
## 
## The following object is masked from 'package:caret':
## 
##     R2
## 
## The following object is masked from 'package:stats':
## 
##     loadings

plsFit1 <- train(Class~., data=training, method = "pls", 
                 tuneLength=15, metric="ROC", 
                 trControl=(trainControl(method="repeatedcv", repeats=3, 
                 classProbs=TRUE, summaryFunction=twoClassSummary)),  
                 preProc=c("center","scale"))
plsFit1

## Partial Least Squares 
## 
## 157 samples
##  60 predictor
##   2 classes: 'M', 'R' 
## 
## Pre-processing: centered, scaled 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## 
## Summary of sample sizes: 142, 141, 142, 142, 141, 142, ... 
## 
## Resampling results across tuning parameters:
## 
##   ncomp  ROC        Sens       Spec       ROC SD     Sens SD    Spec SD  
##    1     0.7760334  0.6564815  0.7053571  0.1346800  0.1370536  0.1847901
##    2     0.8129464  0.7236111  0.7505952  0.1516660  0.1918044  0.1674809
##    3     0.8365575  0.7768519  0.7678571  0.1241883  0.1543559  0.1659320
##    4     0.8264054  0.7777778  0.7434524  0.1342993  0.1581055  0.1653002
##    5     0.7925926  0.7245370  0.7345238  0.1320289  0.1680610  0.1754489
##    6     0.7736524  0.7314815  0.7351190  0.1333448  0.1700383  0.1667308
##    7     0.7708085  0.7208333  0.7333333  0.1282312  0.1881278  0.1627156
##    8     0.7747933  0.7203704  0.7351190  0.1250950  0.2046362  0.1634673
##    9     0.7746280  0.7087963  0.7357143  0.1198253  0.1991589  0.1585722
##   10     0.7730737  0.6847222  0.7327381  0.1205608  0.1984116  0.1508307
##   11     0.7763476  0.7162037  0.7339286  0.1174233  0.1957973  0.1601557
##   12     0.7711392  0.7208333  0.7160714  0.1206202  0.1818349  0.1680621
##   13     0.7680638  0.7138889  0.7113095  0.1280167  0.1957265  0.1701256
##   14     0.7730737  0.7134259  0.7113095  0.1218564  0.1931223  0.1659375
##   15     0.7608879  0.7138889  0.6988095  0.1419384  0.1704784  0.1785550
## 
## ROC was used to select the optimal model using  the largest value.
## The final value used for the model was ncomp = 3.

plot(plsFit1)

plsClasses <- predict(plsFit1, newdata=testing)
str(plsClasses)

##  Factor w/ 2 levels "M","R": 2 2 2 2 2 1 2 2 2 2 ...

confusionMatrix(data=plsClasses, testing$Class)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  M  R
##          M 22  4
##          R  5 20
##                                          
##                Accuracy : 0.8235         
##                  95% CI : (0.6913, 0.916)
##     No Information Rate : 0.5294         
##     P-Value [Acc > NIR] : 1.117e-05      
##                                          
##                   Kappa : 0.6467         
##  Mcnemar's Test P-Value : 1              
##                                          
##             Sensitivity : 0.8148         
##             Specificity : 0.8333         
##          Pos Pred Value : 0.8462         
##          Neg Pred Value : 0.8000         
##              Prevalence : 0.5294         
##          Detection Rate : 0.4314         
##    Detection Prevalence : 0.5098         
##       Balanced Accuracy : 0.8241         
##                                          
##        'Positive' Class : M              
##

plsProb <- predict(plsFit1, newdata=testing, type="prob")
head(plsProb)

##            M         R
## 4  0.4447738 0.5552262
## 10 0.4724479 0.5275521
## 14 0.3452033 0.6547967
## 17 0.4759336 0.5240664
## 19 0.2985823 0.7014177
## 34 0.5445711 0.4554289

Partial Least Square (PLS) Illustration

SA