Linear Discriminant Analysis

Introduction to LDA

##input data
dYes=data.frame(Durability=c(8,6,10,9,4),Performance=c(9,7,6,4,8),Decision=rep("y",5))
dNo=data.frame(Durability=c(5,3,4,2,2),Performance=c(4,7,5,4,2),Decision=rep("n",5))
survey=merge(dYes,dNo,all=TRUE)

##run the lda algorithm
library(MASS)
lda.result=lda(Decision ~.,survey); lda.result

## Call:
## lda(Decision ~ ., data = survey)
## 
## Prior probabilities of groups:
##   y   n 
## 0.5 0.5 
## 
## Group means:
##   Durability Performance
## y        7.4         6.8
## n        3.2         4.4
## 
## Coefficients of linear discriminants:
##                 LD1
## Durability  -0.4755
## Performance -0.3588

plot(lda.result)

plot of chunk unnamed-chunk-1

x=seq(0,12,by=0.2)
y=seq(0,12,by=0.2)

##retreive the values of beta coefficients
beta1=lda.result$scaling[1]
beta2=lda.result$scaling[2]

##display the values of lda coefficients
beta1; beta2

## [1] -0.4755

## [1] -0.3588

########################

Let us manually implement the LDA algorithm.

##Manually implementing the LDA algorithm
##compute mean of each predictor for each class
meanDurYes=with(survey, mean(Durability[Decision=="y"]))
meanPerYes=with(survey, mean(Performance[Decision=="y"]))

meanDurNo=with(survey, mean(Durability[Decision=="n"]))
meanPerNo=with(survey, mean(Performance[Decision=="n"]))

##create a vector of the difference/average of averaged means across the groups
meanDiff=c(meanDurYes-meanDurNo, meanPerYes-meanPerNo)
meanAvg=c((meanDurYes+meanDurNo)/2, (meanPerYes+meanPerNo)/2)

##compute covariance matrix of all predictors for each class
covarYes=with(survey, cov(survey[Decision=="y",1:2]))
covarNo=with(survey, cov(survey[Decision=="n",1:2]))

##compute pooled covariance matrix
nYes=5; nNo=5; nTotal=nYes+nNo
pooledCovar=nYes/nTotal*covarYes+nNo/nTotal*covarNo

##compute inverse of pooled covariance matrix
invCovar=solve(pooledCovar)

##compute beta coefficients
beta=invCovar%*%(meanDiff); beta

##              [,1]
## Durability  1.359
## Performance 1.026

##compute Mahalanobis distance MD
##MD gives the number of standard deviations by which the group means differ
##The larger the MD the better is the discrimination

MD2= t(beta)%*%meanDiff
MD=sqrt(MD2); MD

##       [,1]
## [1,] 2.858

##Using the model to make predictions
cutoff=log((nYes/nTotal)/(nNo/nTotal))
z0=t(beta)%*%meanAvg

##Suppose the new value is Durability=2 and Performance=1.
##Will the purchase decision be Yes or No?
z=t(beta)%*%c(2,1)
decision=(z-z0)>cutoff

if(decision){ print("This guy is going to purchase this product.")
    } else print("This guy is NOT going to purchase this product.")

## [1] "This guy is NOT going to purchase this product."

##let us draw a perspective diagram to visualize the LDA algorith.
beta1= beta[1]; beta2=beta[2]

f=function(x,y){beta1*x+beta2*y}
z=outer(x,y,f)
par(cex=0.8) ##reduce the font size in axis label
res=persp(x, y, z, theta = 30, phi = 40, expand = 0.8, col = "lightgreen",
      ltheta = 120, shade = 0.75, ticktype = "detailed", 
      xlab = "X", ylab = "Y", zlab = "z-score") 


##points for purchase=Y
zYes=f(dYes[,1],dYes[,2])
points(trans3d(dYes[,1],dYes[,2],zYes, pmat = res), col = "blue", pch = 16)

##projections on z-axis for purchase=Y
pYZ=cbind(rep(0,5),rep(0,5))
points(trans3d(pYZ[,1],pYZ[,2],zYes, pmat = res), col = "blue", pch = 4)

##points for purchase=N
zNo=f(dNo[,1],dNo[,2])
points(trans3d(dNo[,1],dNo[,2],zNo, pmat = res), col = "blue", pch = 16)
points(trans3d(dNo[,1],dNo[,2],zNo, pmat = res), col = "red", pch = 16)

##projections on z-axis for purchase=N
pNZ=cbind(rep(0,5),rep(0,5))
points(trans3d(pNZ[,1],pNZ[,2],zNo, pmat = res), col = "red", pch = 4)

plot of chunk unnamed-chunk-3