This example was taken from a German blog post.
Grouping variable group, A and B. A: first class automobile B: middle class models. c: Low Class models.
15 consumers were asked to rank the models according to security, ecology, innovation and prestige.
Likert-scale: 1 (low ranking) to 10 (high ranking).
df <- read.csv2("C:/Users/anilk/OneDrive - SRH IT/Analytics-2/Input_data/lda_ex.csv")
head(df)
## group security ecology innovation prestige
## 1 A 8 6 8 10
## 2 A 7 7 8 9
## 3 A 8 6 9 9
## 4 A 9 5 7 9
## 5 A 10 8 9 10
## 6 A 7 5 6 8
str(df)
## 'data.frame': 45 obs. of 5 variables:
## $ group : chr "A" "A" "A" "A" ...
## $ security : int 8 7 8 9 10 7 9 8 7 8 ...
## $ ecology : int 6 7 6 5 8 5 7 7 6 7 ...
## $ innovation: int 8 8 9 7 9 6 10 9 7 8 ...
## $ prestige : int 10 9 9 9 10 8 9 9 9 8 ...
df <- subset(df , group != "C")
df$group <- factor(df$group)
str(df)
## 'data.frame': 30 obs. of 5 variables:
## $ group : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ security : int 8 7 8 9 10 7 9 8 7 8 ...
## $ ecology : int 6 7 6 5 8 5 7 7 6 7 ...
## $ innovation: int 8 8 9 7 9 6 10 9 7 8 ...
## $ prestige : int 10 9 9 9 10 8 9 9 9 8 ...
#df <- droplevels(df)
#str(df)
summary(df)
## group security ecology innovation prestige
## A:15 Min. : 7.000 Min. : 5.000 Min. : 4.000 Min. : 5.000
## B:15 1st Qu.: 8.000 1st Qu.: 6.250 1st Qu.: 5.000 1st Qu.: 6.000
## Median : 8.000 Median : 7.500 Median : 6.000 Median : 6.500
## Mean : 8.433 Mean : 7.367 Mean : 6.633 Mean : 7.133
## 3rd Qu.: 9.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 9.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
plot_box <- function(attr){
colname <- eval(parse(text=paste('df$' ,attr , sep = '' )))
boxplot(colname , horizontal = TRUE)
remove(colname)
}
plot_hist <- function(attr){
colname <- eval(parse(text=paste('df$' ,attr , sep = '' )))
sub_text <- paste('Mean :' , round (mean(colname) , 2) ,
'\nMedian:', round (median(colname) , 2) ,
'\nStd. Dev:', round (sd(colname) , 2)
)
hist(colname , main = attr , labels = TRUE , xlab = '' , sub=sub_text )
}
par(mfrow=c(4,2))
plot_box('security')
plot_box('ecology')
plot_hist('security')
plot_hist('ecology')
plot_box('innovation')
plot_box('prestige')
plot_hist('innovation')
plot_hist('prestige')
remove(plot_box , plot_hist)
require(psych)
## Loading required package: psych
pairs.panels(df[2:5],
gap = 0,
bg = c('red' , 'green' , 'blue' , 'black')[df$group],
pch=21)
par(mfrow=c(2,2))
boxplot(df$security ~ df$group , xlab = 'GROUP' , ylab = 'SECURITY' , main = "SECURITY")
boxplot(df$ecology ~ df$group , xlab = 'GROUP' , ylab = 'ECOLOGY' , main = "ECOLOGY")
boxplot(df$innovation ~ df$group , xlab = 'GROUP' , ylab = 'INNOVAITON' , main = "INNOVAITON")
boxplot(df$prestige ~ df$group , xlab = 'GROUP' , ylab = 'PRESTIGE' , main = "PRESTIGE")
## Fitting LDA :-
require(MASS)
## Loading required package: MASS
fit <- lda(group ~ . , data=df)
fit
## Call:
## lda(group ~ ., data = df)
##
## Prior probabilities of groups:
## A B
## 0.5 0.5
##
## Group means:
## security ecology innovation prestige
## A 8.400000 6.533333 7.933333 8.666667
## B 8.466667 8.200000 5.333333 5.600000
##
## Coefficients of linear discriminants:
## LD1
## security 0.02473145
## ecology 0.48227799
## innovation -0.30381323
## prestige -0.90305578
plot(fit , type="both")
p <- predict(fit)
head(p)
## $class
## [1] A A A A A A A A A A A B A A A B B B B B B B B B B B B B B B
## Levels: A B
##
## $posterior
## A B
## 1 9.999999e-01 1.086477e-07
## 2 9.999588e-01 4.122222e-05
## 3 9.999985e-01 1.485701e-06
## 4 9.999971e-01 2.860349e-06
## 5 9.999976e-01 2.411406e-06
## 6 9.995531e-01 4.468691e-04
## 7 9.999964e-01 3.606657e-06
## 8 9.999878e-01 1.219334e-05
## 9 9.999811e-01 1.891677e-05
## 10 9.976405e-01 2.359483e-03
## 11 9.999907e-01 9.322778e-06
## 12 3.262928e-04 9.996737e-01
## 13 9.999931e-01 6.943712e-06
## 14 9.982416e-01 1.758425e-03
## 15 9.993724e-01 6.275789e-04
## 16 3.239822e-06 9.999968e-01
## 17 8.602414e-07 9.999991e-01
## 18 3.262928e-04 9.996737e-01
## 19 8.819030e-04 9.991181e-01
## 20 1.367531e-03 9.986325e-01
## 21 3.239822e-06 9.999968e-01
## 22 1.381097e-05 9.999862e-01
## 23 4.267193e-04 9.995733e-01
## 24 5.689226e-06 9.999943e-01
## 25 1.227771e-03 9.987722e-01
## 26 1.668258e-04 9.998332e-01
## 27 1.668258e-04 9.998332e-01
## 28 9.389979e-04 9.990610e-01
## 29 2.929150e-04 9.997071e-01
## 30 7.864905e-06 9.999921e-01
##
## $x
## LD1
## 1 -3.673802
## 2 -2.313199
## 3 -3.074559
## 4 -2.924479
## 5 -2.963596
## 6 -1.767073
## 7 -2.871363
## 8 -2.592281
## 9 -2.491664
## 10 -1.385412
## 11 -2.653781
## 12 1.839149
## 13 -2.721283
## 14 -1.452914
## 15 -1.689225
## 16 2.895938
## 17 3.199751
## 18 1.839149
## 19 1.611221
## 20 1.510604
## 21 2.895938
## 22 2.563740
## 23 1.777649
## 24 2.766936
## 25 1.535335
## 26 1.992882
## 27 1.992882
## 28 1.596836
## 29 1.863880
## 30 2.692742
#require(devtools)
#install_github("fawda123/ggord")
#require(ggord)
#ggord(fit , df$group)
require(klaR)
## Loading required package: klaR
partimat(group~. , data = df , method = "lda")
df$Predicted_group <- p$class
df$prob_A <- p$posterior[,1]
df$prob_B <- p$posterior[,2]
df$descriment_values <- p$x[,1]
remove(p)
head(df)
## group security ecology innovation prestige Predicted_group prob_A
## 1 A 8 6 8 10 A 0.9999999
## 2 A 7 7 8 9 A 0.9999588
## 3 A 8 6 9 9 A 0.9999985
## 4 A 9 5 7 9 A 0.9999971
## 5 A 10 8 9 10 A 0.9999976
## 6 A 7 5 6 8 A 0.9995531
## prob_B descriment_values
## 1 1.086477e-07 -3.673802
## 2 4.122222e-05 -2.313199
## 3 1.485701e-06 -3.074559
## 4 2.860349e-06 -2.924479
## 5 2.411406e-06 -2.963596
## 6 4.468691e-04 -1.767073
f <- table(df$group , df$Predicted_group)
f
##
## A B
## A 14 1
## B 0 15
round ( (sum(diag(f)) / sum(f))*100 , 2 )
## [1] 96.67
remove(f)
nd <- data.frame(security=c(6, 5), ecology=c(8, 5),
innovation=c(7, 6), prestige=c(8, 5))
predict(fit , newdata = nd)
## $class
## [1] A B
## Levels: A B
##
## $posterior
## A B
## 1 0.94436846 0.05563154
## 2 0.01991636 0.98008364
##
## $x
## LD1
## 1 -0.6487837
## 2 0.8926315