LDA using caret

LDA using “caret” package:

data(iris)

names(iris) = tolower(names(iris))

names(iris)

## [1] "sepal.length" "sepal.width"  "petal.length" "petal.width" 
## [5] "species"

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ sepal.length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ sepal.width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ petal.length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ petal.width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

head(iris)

##   sepal.length sepal.width petal.length petal.width species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

dim(iris)

## [1] 150   5

suppressMessages(library(caret))

index = createDataPartition(y=iris$species, p=0.7, list=FALSE)

train = iris[index,]
test = iris[-index,]

dim(train)

## [1] 105   5

dim(test)

## [1] 45  5

lda.fit = train(species ~ ., data=train, method="lda",
                trControl = trainControl(method = "cv"))

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

lda.fit

## Linear Discriminant Analysis 
## 
## 105 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 95, 95, 95, 94, 93, 93, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9705556  0.9556818
## 
##

summary(lda.fit)

##             Length Class      Mode     
## prior        3     -none-     numeric  
## counts       3     -none-     numeric  
## means       12     -none-     numeric  
## scaling      8     -none-     numeric  
## lev          3     -none-     character
## svd          2     -none-     numeric  
## N            1     -none-     numeric  
## call         3     -none-     call     
## xNames       4     -none-     character
## problemType  1     -none-     character
## tuneValue    1     data.frame list     
## obsLevels    3     -none-     character

pred.species = predict(lda.fit, test)

table(pred.species, test$species)

##             
## pred.species setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         15         1
##   virginica       0          0        14

pred.accuracy = round(mean(pred.species == test$species)*100,2)
pred.accuracy

## [1] 97.78

# plot of original species from test-set:
qplot(petal.width, sepal.width, data=test, cex=2, col=species)

# plot of predicted species with misclassified species:
test$pred.right = pred.species == test$species

qplot(petal.width, sepal.width, data=test, cex=2, col=pred.right)

Red points are misclassified observations as per LDA model.

LDA using caret

Maulik Patel

November 23, 2016

LDA using “caret” package: