Introduction to LDA
##input data
dYes=data.frame(Durability=c(8,6,10,9,4),Performance=c(9,7,6,4,8),Decision=rep("y",5))
dNo=data.frame(Durability=c(5,3,4,2,2),Performance=c(4,7,5,4,2),Decision=rep("n",5))
survey=merge(dYes,dNo,all=TRUE)
##run the lda algorithm
library(MASS)
lda.result=lda(Decision ~.,survey); lda.result
## Call:
## lda(Decision ~ ., data = survey)
##
## Prior probabilities of groups:
## y n
## 0.5 0.5
##
## Group means:
## Durability Performance
## y 7.4 6.8
## n 3.2 4.4
##
## Coefficients of linear discriminants:
## LD1
## Durability -0.4755
## Performance -0.3588
plot(lda.result)
x=seq(0,12,by=0.2)
y=seq(0,12,by=0.2)
##retreive the values of beta coefficients
beta1=lda.result$scaling[1]
beta2=lda.result$scaling[2]
##display the values of lda coefficients
beta1; beta2
## [1] -0.4755
## [1] -0.3588
########################
Let us manually implement the LDA algorithm.
##Manually implementing the LDA algorithm
##compute mean of each predictor for each class
meanDurYes=with(survey, mean(Durability[Decision=="y"]))
meanPerYes=with(survey, mean(Performance[Decision=="y"]))
meanDurNo=with(survey, mean(Durability[Decision=="n"]))
meanPerNo=with(survey, mean(Performance[Decision=="n"]))
##create a vector of the difference/average of averaged means across the groups
meanDiff=c(meanDurYes-meanDurNo, meanPerYes-meanPerNo)
meanAvg=c((meanDurYes+meanDurNo)/2, (meanPerYes+meanPerNo)/2)
##compute covariance matrix of all predictors for each class
covarYes=with(survey, cov(survey[Decision=="y",1:2]))
covarNo=with(survey, cov(survey[Decision=="n",1:2]))
##compute pooled covariance matrix
nYes=5; nNo=5; nTotal=nYes+nNo
pooledCovar=nYes/nTotal*covarYes+nNo/nTotal*covarNo
##compute inverse of pooled covariance matrix
invCovar=solve(pooledCovar)
##compute beta coefficients
beta=invCovar%*%(meanDiff); beta
## [,1]
## Durability 1.359
## Performance 1.026
##compute Mahalanobis distance MD
##MD gives the number of standard deviations by which the group means differ
##The larger the MD the better is the discrimination
MD2= t(beta)%*%meanDiff
MD=sqrt(MD2); MD
## [,1]
## [1,] 2.858
##Using the model to make predictions
cutoff=log((nYes/nTotal)/(nNo/nTotal))
z0=t(beta)%*%meanAvg
##Suppose the new value is Durability=2 and Performance=1.
##Will the purchase decision be Yes or No?
z=t(beta)%*%c(2,1)
decision=(z-z0)>cutoff
if(decision){ print("This guy is going to purchase this product.")
} else print("This guy is NOT going to purchase this product.")
## [1] "This guy is NOT going to purchase this product."
##let us draw a perspective diagram to visualize the LDA algorith.
beta1= beta[1]; beta2=beta[2]
f=function(x,y){beta1*x+beta2*y}
z=outer(x,y,f)
par(cex=0.8) ##reduce the font size in axis label
res=persp(x, y, z, theta = 30, phi = 40, expand = 0.8, col = "lightgreen",
ltheta = 120, shade = 0.75, ticktype = "detailed",
xlab = "X", ylab = "Y", zlab = "z-score")
##points for purchase=Y
zYes=f(dYes[,1],dYes[,2])
points(trans3d(dYes[,1],dYes[,2],zYes, pmat = res), col = "blue", pch = 16)
##projections on z-axis for purchase=Y
pYZ=cbind(rep(0,5),rep(0,5))
points(trans3d(pYZ[,1],pYZ[,2],zYes, pmat = res), col = "blue", pch = 4)
##points for purchase=N
zNo=f(dNo[,1],dNo[,2])
points(trans3d(dNo[,1],dNo[,2],zNo, pmat = res), col = "blue", pch = 16)
points(trans3d(dNo[,1],dNo[,2],zNo, pmat = res), col = "red", pch = 16)
##projections on z-axis for purchase=N
pNZ=cbind(rep(0,5),rep(0,5))
points(trans3d(pNZ[,1],pNZ[,2],zNo, pmat = res), col = "red", pch = 4)