Summary

Implementation of the equation for LDA and QDA found in the “an introduction to Statistical learning” book.

LDA for p = 1

Functions

library(MASS)

## Warning: package 'MASS' was built under R version 3.3.3

library(ISLR)

## Warning: package 'ISLR' was built under R version 3.3.3

# mean parameters of the k class
pi_lda <- function(y){
  pi_est <- table(y) / length(y)
  return(as.matrix(pi_est))
}


# Output the group means
# these are the average of the predictor within each class, and are used by LDA as estimates of mu k
mu_lda <- function(X, y){
  data_est <- as.data.frame(cbind(X,y))
  data_est$X <- as.numeric(as.character(data_est$X))
  mu <- aggregate(data = data_est, X ~ y, FUN = "mean")
  colnames(mu) <- c("y", "X")
  return(mu)
}

# variance parameter of the k class
var_lda <- function(X, y, mu){
  n <- length(X)
  K <- length(unique(y))
  k <- unique(y)
  var_est <- 0
  
  for (i in 1:K){
    var_est <- sum((X[y == k[i]] - mu$X[k[i] == mu$y])^2) + var_est
  }
  
  var_est <- (1 / (n - K)) * var_est
  return(var_est)
}


# discriminant function for p = 1
discriminant_lda <- function(X, pi, mu, var){
  K <- length(unique(y))
  k <- unique(y)
  
  disc <- matrix(nrow = length(X), ncol = K)
  colnames(disc) <- k
  
  for (i in 1:K){
    disc[ ,i] <- X * (mu$X[i] / var) - ((mu$X[i]^2) / (2 * var)) + log(pi[i]) 
  }
  
  disc <- as.data.frame(disc)
  disc$predict <- apply(disc, 1, FUN = "which.max")
  return(disc)
}

Test

X <- iris[ ,1]
y <- as.character(iris[ ,5])

pi_est <- pi_lda(y)
mu_est <- mu_lda(X, y)
var_est <- var_lda(X, y, mu_est)
discriminant_est <- discriminant_lda(X, pi_est, mu_est, var_est)

table(discriminant_est$predict, iris$Species) # confusion matrix

##    
##     setosa versicolor virginica
##   1     45          6         1
##   2      5         30        12
##   3      0         14        37

# It gives us 74.5% accuracy

# Compared with the lda function from the MASS package
lda_fit <- lda(data= iris, Species ~ Sepal.Length)
predict_lda_fit <- predict(lda_fit, iris[, c(1,5)])

table(predict_lda_fit$class, iris$Species)

##             
##              setosa versicolor virginica
##   setosa         45          6         1
##   versicolor      5         30        12
##   virginica       0         14        37

# same result

Bayes Theroem

# the predictor variable shows the probability that a given observastion is part of the k class (calculated from the Bayes theorem)
# the sum of the probabilities = 1
head(predict_lda_fit$posterior,5)

##      setosa versicolor    virginica
## 1 0.7766431 0.21124550 0.0121114410
## 2 0.8775438 0.11830923 0.0041469416
## 3 0.9361050 0.06255445 0.0013405050
## 4 0.9543491 0.04489860 0.0007523042
## 5 0.8332780 0.15956874 0.0071533030

# example of how to calculate it
A <- (1 / (sqrt(2 * pi_est[1])) * sqrt(var_est)) * exp((-1 / (2 * var_est)) * (X[1] - mu_est[1,2])^2)

B <- (1 / (sqrt(2 * pi_est[2])) * sqrt(var_est)) * exp((-1 / (2 * var_est)) * (X[1] - mu_est[2,2])^2)

C <- (1 / (sqrt(2 * pi_est[3])) * sqrt(var_est)) * exp((-1 / (2 * var_est)) * (X[1] - mu_est[3,2])^2)

D <- pi_est[1] * A + pi_est[2] * B + pi_est[3] * C

print((pi_est[1] * A) / D)

## [1] 0.7766431

print((pi_est[2] * B) / D)

## [1] 0.2112455

print((pi_est[3] * C) / D)

## [1] 0.01211144

predict_lda_fit$posterior[1,]

##     setosa versicolor  virginica 
## 0.77664306 0.21124550 0.01211144

LDA for p > 1

Functions

mu_lda <- function(X, y){
  mu <- aggregate(X ~ y, FUN = "mean")
  row.names(mu) <- mu[ ,1]
  mu <- mu[ ,-1]
  mu <- as.matrix(mu)
  
  return(mu)
}


multi_discriminant_lda <- function(X, y, pi, mu, var){
  disc <- matrix(nrow = nrow(X), ncol = length(unique(y)))
  colnames(disc) <- unique(y)
  
  for (i in 1 : length(unique(y))){
      for (j in 1: nrow(X)){
        disc[j,i] <- X[j, ] %*% var %*% mu[i, ] - (1/2) * t(mu[i, ]) %*% var %*% mu[i, ] + log(pi[i]) 
      }
  }
  
  disc <- as.data.frame(disc)
  disc$predict <- apply(disc, 1, FUN = "which.max")
  return(disc)
  
}

# # other possibility (same result but vector implementation)
# A <- X %*% solve(cov(X)) %*% mu_est[1,] 
# B <- - ((1/2) * t(mu_est[1,]) %*% solve(cov(X)) %*% mu_est[1,]) + log(pi_est[1])
# A + as.numeric(B)

Test

X <- as.matrix(iris[, 1:4])
y <- as.character(iris[ ,5])

# let's try with scaled X
X <- scale(X)


pi_est <- pi_lda(y)
mu_est <- mu_lda(X, y)
var_est <- solve(cov(X))

discriminant_est <- multi_discriminant_lda(X, y, pi_est, mu_est, var_est)

table(discriminant_est$predict, iris$Species)

##    
##     setosa versicolor virginica
##   1     49          0         0
##   2      1         42        11
##   3      0          8        39

QDA

We will now allow the observations in the K class to have specific variance. mu and pi functions stay similar to the lda ones

var_qda <- function(X,y){
  K <- length(unique(y))
  k <- unique(y)
  
  for (i in 1:K){
    var <- solve(cov(X[y == k[i], ]))
    ifelse(exists("var2"), var2 <- c(var2, list(var)), var2 <- list(var))
    
  }
  
  var2 <- structure(var2, names= k)
  return(var2)
}

qda_fct <- function(X, y, pi, mu, var){
  m <- length(y)
  K <- length(unique(y))
  k <- unique(y) 
  
  qda_result <- matrix(0, nrow = m, ncol = K + 1)
  colnames(qda_result) <- c(k, "Result")
  
  for (j in 1:K){
    for (i in 1:m){
      
      qda_result[i,j] <- -(1/2) * X[i, ] %*% var_est[[j]] %*% X[i, ] + X[i, ] %*% var_est[[j]] %*% mu_est[j, ] - (1/2) * mu_est[j, ] %*% var_est[[j]] %*% mu_est[j, ] + log(pi_est[j])
      
    }
  }
  
  qda_result <- as.data.frame(qda_result)
  qda_result$Result <- apply(qda_result[, 1:K], 1, FUN= "which.max")
  return(qda_result)
}

Test on Iris dataset

pi_est <- pi_lda(y)
mu_est <- mu_lda(X,y)
var_est <- var_qda(X,y)

qda_training <- qda_fct(X, y, pi_est, mu_est, var_est)
table(qda_training$Result, iris$Species) #confusion matrix

##    
##     setosa versicolor virginica
##   1     50          0         0
##   2      0         47         0
##   3      0          3        50

# test with QDA function from R

qda_fit <-  qda(data= iris, Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
table(predict(qda_fit, iris[, 1:4])$class, iris$Species)

##             
##              setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         1
##   virginica       0          2        49

Linear Discriminant Analysis (LDA) and QDA

ndee

20 November 2017

Summary

LDA for p = 1

Functions

Test

Bayes Theroem

LDA for p > 1

Functions

Test

QDA

Test on Iris dataset