In this lesson students will …
Iris is one of the most common datasets for statistical examples! It is a right of passage to use it in a class.
data("iris")
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## SCALE
iris[1:4] <- scale(iris[1:4])
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Split the data into training and test set
set.seed(123)
caretSamp <- createDataPartition(iris$Species ,
p = 0.7,
list = FALSE)
## Partition
trainCaret <- iris[caretSamp, ]
testCaret <- iris[-caretSamp, ]
## check tables
prop.table(table(trainCaret$Species))
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
prop.table(table(testCaret$Species))
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
library(MASS)
#fit LDA model
model <- lda(Species~., data=trainCaret)
#view model output
model
## Call:
## lda(Species ~ ., data = trainCaret)
##
## Prior probabilities of groups:
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa -1.0287883 0.7075127 -1.2952890 -1.2698202
## versicolor 0.1201882 -0.6428359 0.2859897 0.1695509
## virginica 0.9517298 -0.1708694 1.0385942 1.0953964
##
## Coefficients of linear discriminants:
## LD1 LD2
## Sepal.Length 0.7124281 -0.0209607
## Sepal.Width 0.6051757 -1.0351419
## Petal.Length -4.0125617 1.5851613
## Petal.Width -2.2207822 -2.0483898
##
## Proportion of trace:
## LD1 LD2
## 0.992 0.008
## PREDICT
predicted <- predict(model, testCaret)
names(predicted)
## [1] "class" "posterior" "x"
#find accuracy of model
mean(predicted$class==testCaret$Species)
## [1] 0.9777778
#define data to plot
lda_plot <- cbind(trainCaret, predict(model)$x)
#create plot
ggplot(lda_plot, aes(LD1, LD2)) +
geom_point(aes(color = Species))
#install.packages("neuralnet")
library(neuralnet)
# Binary classification
nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, trainCaret, linear.output = FALSE)
## Prediction
pred <- predict(nn, testCaret)
## Table
table(testCaret$Species == "setosa", pred[, 1] > 0.5)
##
## FALSE TRUE
## FALSE 30 0
## TRUE 0 15
## Accuracy
mean((testCaret$Species == "setosa")==(pred[, 1] > 0.5))
## [1] 1
# Multiclass classification
nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica")
~ ., trainCaret, linear.output = FALSE)
## PREDICT
pred <- predict(nn, testCaret)
## TABLE
irisTab<-table(testCaret$Species, apply(pred, 1, which.max))
irisTab
##
## 1 2 3
## setosa 15 0 0
## versicolor 0 15 0
## virginica 0 0 15
## ACCURACY
sum(diag(irisTab))/sum(irisTab)
## [1] 1
## PLOT
plot(nn)