Learning Objectives

In this lesson students will …

  • Review LDA and use it for dimension reduction
  • Apply a Basic Neural Net

Step 1: Load the Iris Data

Iris is one of the most common datasets for statistical examples! It is a right of passage to use it in a class.

data("iris")
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

Step 2: Scale the Data

## SCALE
iris[1:4] <- scale(iris[1:4])

Step 3: Split Training and Testing

library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Split the data into training and test set
set.seed(123)
caretSamp <- createDataPartition(iris$Species , 
                                 p = 0.7, 
                                 list = FALSE)

## Partition
trainCaret  <- iris[caretSamp, ]
testCaret <- iris[-caretSamp, ]

## check tables
prop.table(table(trainCaret$Species))
## 
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333
prop.table(table(testCaret$Species))
## 
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333

Step 4: Fit LDA

library(MASS)
#fit LDA model
model <- lda(Species~., data=trainCaret)

#view model output
model
## Call:
## lda(Species ~ ., data = trainCaret)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa       -1.0287883   0.7075127   -1.2952890  -1.2698202
## versicolor    0.1201882  -0.6428359    0.2859897   0.1695509
## virginica     0.9517298  -0.1708694    1.0385942   1.0953964
## 
## Coefficients of linear discriminants:
##                     LD1        LD2
## Sepal.Length  0.7124281 -0.0209607
## Sepal.Width   0.6051757 -1.0351419
## Petal.Length -4.0125617  1.5851613
## Petal.Width  -2.2207822 -2.0483898
## 
## Proportion of trace:
##   LD1   LD2 
## 0.992 0.008

Step 5: Predict with LDA

## PREDICT
predicted <- predict(model, testCaret)

names(predicted)
## [1] "class"     "posterior" "x"
#find accuracy of model
mean(predicted$class==testCaret$Species)
## [1] 0.9777778

STEP 6: Dimension Reduction

#define data to plot
lda_plot <- cbind(trainCaret, predict(model)$x)

#create plot
ggplot(lda_plot, aes(LD1, LD2)) +
  geom_point(aes(color = Species))

STEP 7: Basic Neural Net

Binary Example

#install.packages("neuralnet")
library(neuralnet)
# Binary classification
nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, trainCaret, linear.output = FALSE)

## Prediction
pred <- predict(nn, testCaret)

## Table
table(testCaret$Species == "setosa", pred[, 1] > 0.5)
##        
##         FALSE TRUE
##   FALSE    30    0
##   TRUE      0   15
## Accuracy
mean((testCaret$Species == "setosa")==(pred[, 1] > 0.5))
## [1] 1

Multiclass

# Multiclass classification
nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica")
                ~ ., trainCaret, linear.output = FALSE)


## PREDICT
pred <- predict(nn, testCaret)

## TABLE
irisTab<-table(testCaret$Species, apply(pred, 1, which.max))
irisTab
##             
##               1  2  3
##   setosa     15  0  0
##   versicolor  0 15  0
##   virginica   0  0 15
## ACCURACY
sum(diag(irisTab))/sum(irisTab)
## [1] 1
## PLOT
plot(nn)