Linear Discriminant Analysis in R

Load Library

library(klaR)
## Loading required package: MASS
library(psych)
library(MASS)
library(ggord)
library(devtools)
## Loading required package: usethis

Getting Data

data("iris")
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

Social Network Analysis in R

pairs.panels(iris[1:4],
             gap = 0,
             bg = c("red", "green", "blue")[iris$Species],
             pch = 21)

Data partition

set.seed(123)
ind <- sample(2, nrow(iris),
              replace = TRUE,
              prob = c(0.6, 0.4))
training <- iris[ind==1,]
testing <- iris[ind==2,]

Linear discriminant analysis

linear <- lda(Species~., training)
linear
## Call:
## lda(Species ~ ., data = training)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3370787  0.3370787  0.3258427 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa         4.946667    3.380000     1.443333    0.250000
## versicolor     5.943333    2.803333     4.240000    1.316667
## virginica      6.527586    2.920690     5.489655    2.048276
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.3629008  0.05215114
## Sepal.Width   2.2276982  1.47580354
## Petal.Length -1.7854533 -1.60918547
## Petal.Width  -3.9745504  4.10534268
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9932 0.0068

Decision Tree R Code

attributes(linear)
## $names
##  [1] "prior"   "counts"  "means"   "scaling" "lev"     "svd"     "N"      
##  [8] "call"    "terms"   "xlevels"
## 
## $class
## [1] "lda"

Histogram

p <- predict(linear, training)
ldahist(data = p$x[,1], g = training$Species)

Market Basket Analysis in Data Mining

ldahist(data = p$x[,2], g = training$Species)

Bi-Plot

ggord(linear, training$Species, ylim = c(-10, 10))

Partition plot

partimat(Species~., data = training, method = "lda")

partimat(Species~., data = training, method = "qda")

Confusion matrix and accuracy – training data

p1 <- predict(linear, training)$class
tab <- table(Predicted = p1, Actual = training$Species)
tab
##             Actual
## Predicted    setosa versicolor virginica
##   setosa         30          0         0
##   versicolor      0         30         0
##   virginica       0          0        29

Repeated Measures of ANOVA in R

sum(diag(tab))/sum(tab)
## [1] 1

Confusion matrix and accuracy – testing data

p2 <- predict(linear, testing)$class
tab1 <- table(Predicted = p2, Actual = testing$Species)
tab1
##             Actual
## Predicted    setosa versicolor virginica
##   setosa         20          0         0
##   versicolor      0         19         1
##   virginica       0          1        20
sum(diag(tab1))/sum(tab1)
## [1] 0.9672131