第6章 判别分析
6.2 线性判别分析
6.2.2 Fisher 线性判别求解
library(readxl)
d6.1 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvstats5.xlsx", "d6.1")
d6.1
## # A tibble: 20 × 3
## G x1 x2
## <dbl> <dbl> <dbl>
## 1 1 -1.9 3.2
## 2 1 -6.9 0.4
## 3 1 5.2 2
## 4 1 5 2.5
## 5 1 7.3 0
## 6 1 6.8 12.7
## 7 1 0.9 -5.4
## 8 1 -12.5 -2.5
## 9 1 1.5 1.3
## 10 1 3.8 6.8
## 11 2 0.2 6.2
## 12 2 -0.1 7.5
## 13 2 0.4 14.6
## 14 2 2.7 8.3
## 15 2 2.1 0.8
## 16 2 -4.6 4.3
## 17 2 -1.7 10.9
## 18 2 -2.6 13.1
## 19 2 2.6 12.8
## 20 2 -2.8 10
attach(d6.1)
par(mar = c(3, 4, 1, 1), mfrow = c(1, 2))
boxplot(x1 ~ G, d6.1, ylab = "湿温差(x1)")
boxplot(x2 ~ G, d6.1, ylab = "气温差(x2)")

par(mar = c(3, 4, 1, 1), mfrow = c(1, 1))
plot(x1, x2)
text(x1, x2, G, adj = -0.5)
library(MASS)
ld = lda(G ~ x1 + x2, data = d6.1)
ld
## Call:
## lda(G ~ x1 + x2, data = d6.1)
##
## Prior probabilities of groups:
## 1 2
## 0.5 0.5
##
## Group means:
## x1 x2
## 1 0.92 2.10
## 2 -0.38 8.85
##
## Coefficients of linear discriminants:
## LD1
## x1 -0.1035305
## x2 0.2247957
Z = predict(ld)
newG = Z$class
cbind(G, Z$x, newG)
## G LD1 newG
## 1 1 -0.28674901 1
## 2 1 -0.39852439 1
## 3 1 -1.29157053 1
## 4 1 -1.15846657 1
## 5 1 -1.95857603 1
## 6 1 0.94809469 2
## 7 1 -2.50987753 1
## 8 1 -0.47066104 1
## 9 1 -1.06586461 1
## 10 1 -0.06760842 1
## 11 2 0.17022402 2
## 12 2 0.49351760 2
## 13 2 2.03780185 2
## 14 2 0.38346871 2
## 15 2 -1.24038077 1
## 16 2 0.24005867 2
## 17 2 1.42347182 2
## 18 2 2.01119984 2
## 19 2 1.40540244 2
## 20 2 1.33503926 2
(tab = table(G, newG))
## newG
## G 1 2
## 1 9 1
## 2 1 9
sum(diag(prop.table(tab)))
## [1] 0.9
text(x1, x2, newG, adj = -0.5)

plot(ld, type = "both")

6.3 距离判别法
d6.3 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvstats5.xlsx", "d6.3")
d6.3
## # A tibble: 20 × 4
## Q C P G3
## <dbl> <dbl> <dbl> <dbl>
## 1 8.3 4 29 1
## 2 9.5 7 68 1
## 3 8 5 39 1
## 4 7.4 7 50 1
## 5 8.8 6.5 55 1
## 6 9 7.5 58 2
## 7 7 6 75 2
## 8 9.2 8 82 2
## 9 8 7 67 2
## 10 7.6 9 90 2
## 11 7.2 8.5 86 2
## 12 6.4 7 53 2
## 13 7.3 5 48 2
## 14 6 2 20 3
## 15 6.4 4 39 3
## 16 6.8 5 48 3
## 17 5.2 3 29 3
## 18 5.8 3.5 32 3
## 19 5.5 4 34 3
## 20 6 4.5 36 3
rm(C, P)
## Warning in rm(C, P): 找不到对象'C'
## Warning in rm(C, P): 找不到对象'P'
attach(d6.3)
par(mar = c(4, 4, 2, 1), cex = 0.75)
plot(Q, C)
text(Q, C, G3, adj = -0.5)

plot(Q, P)
text(Q, P, G3, adj = -0.5)

plot(C, P)
text(C, P, G3, adj = -0.5)

library(MASS)
qd = qda(G3 ~ Q + C + P)
qd
## Call:
## qda(G3 ~ Q + C + P)
##
## Prior probabilities of groups:
## 1 2 3
## 0.25 0.40 0.35
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
qnewG = predict(qd)$class
qtab = table(G3, qnewG)
diag(prop.table(qtab, 1))
## 1 2 3
## 1.000 0.875 1.000
sum(diag(prop.table(qtab)))
## [1] 0.95
predict(qd, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
##
## $posterior
## 1 2 3
## 1 0.008221165 0.9915392 0.0002396287
ld = lda(G3 ~ Q + C + P)
lnewG = predict(ld)$class
cbind(G3, Wx = predict(ld)$x, lnewG)
## G3 LD1 LD2 lnewG
## 1 1 -0.1409984 2.582951755 1
## 2 1 -2.3918356 0.825366275 1
## 3 1 -0.3704452 1.641514840 1
## 4 1 -0.9714835 0.548448277 1
## 5 1 -1.7134891 1.246681993 1
## 6 2 -2.4593598 1.361571174 1
## 7 2 0.3789617 -2.200431689 2
## 8 2 -2.5581070 -0.467096091 2
## 9 2 -1.1900285 -0.412972027 2
## 10 2 -1.7638874 -2.382302324 2
## 11 2 -1.1869165 -2.485574940 2
## 12 2 -0.1123680 -0.598883922 2
## 13 2 0.3399132 0.232863397 3
## 14 3 2.8456561 0.936722573 3
## 15 3 1.5592346 0.025668216 3
## 16 3 0.7457802 -0.209168159 3
## 17 3 3.0062824 -0.358989534 3
## 18 3 2.2511708 0.008852067 3
## 19 3 2.2108260 -0.331206768 3
## 20 3 1.5210939 0.035984885 3
ltab = table(G3, lnewG)
diag(prop.table(ltab, 1))
## 1 2 3
## 1.00 0.75 1.00
sum(diag(prop.table(ltab)))
## [1] 0.9
predict(ld, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
##
## $posterior
## 1 2 3
## 1 0.2114514 0.786773 0.001775594
##
## $x
## LD1 LD2
## 1 -1.537069 -0.1367865
6.4 Bayes 判别法
(ld1 = lda(G3 ~ Q + C + P, prior = c(1, 1, 1)/3))
## Call:
## lda(G3 ~ Q + C + P, prior = c(1, 1, 1)/3)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
##
## Coefficients of linear discriminants:
## LD1 LD2
## Q -0.92307369 0.76708185
## C -0.65222524 0.11482179
## P 0.02743244 -0.08484154
##
## Proportion of trace:
## LD1 LD2
## 0.7259 0.2741
(ld2 = lda(G3 ~ Q + C + P, prior = c(5, 8, 7)/20))
## Call:
## lda(G3 ~ Q + C + P, prior = c(5, 8, 7)/20)
##
## Prior probabilities of groups:
## 1 2 3
## 0.25 0.40 0.35
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
##
## Coefficients of linear discriminants:
## LD1 LD2
## Q -0.81173396 0.88406311
## C -0.63090549 0.20134565
## P 0.01579385 -0.08775636
##
## Proportion of trace:
## LD1 LD2
## 0.7403 0.2597
Z1 = predict(ld1)
cbind(G3, round(Z1$x, 3), newG = Z1$class)
## G3 LD1 LD2 newG
## 1 1 -0.408 2.378 1
## 2 1 -2.403 0.334 1
## 3 1 -0.509 1.414 1
## 4 1 -0.958 0.250 1
## 5 1 -1.787 0.843 1
## 6 2 -2.542 0.856 1
## 7 2 0.749 -2.292 2
## 8 2 -2.394 -0.969 2
## 9 2 -1.046 -0.732 2
## 10 2 -1.350 -2.760 2
## 11 2 -0.764 -2.785 2
## 12 2 0.047 -0.771 2
## 13 2 0.384 0.114 3
## 14 3 2.772 1.148 3
## 15 3 1.620 0.072 3
## 16 3 0.845 -0.270 3
## 17 3 3.105 -0.115 3
## 18 3 2.308 0.148 3
## 19 3 2.313 -0.194 3
## 20 3 1.581 0.077 3
Z2 = predict(ld2)
cbind(G3, round(Z2$x, 3), newG = Z2$class)
## G3 LD1 LD2 newG
## 1 1 -0.141 2.583 1
## 2 1 -2.392 0.825 1
## 3 1 -0.370 1.642 1
## 4 1 -0.971 0.548 1
## 5 1 -1.713 1.247 1
## 6 2 -2.459 1.362 1
## 7 2 0.379 -2.200 2
## 8 2 -2.558 -0.467 2
## 9 2 -1.190 -0.413 2
## 10 2 -1.764 -2.382 2
## 11 2 -1.187 -2.486 2
## 12 2 -0.112 -0.599 2
## 13 2 0.340 0.233 3
## 14 3 2.846 0.937 3
## 15 3 1.559 0.026 3
## 16 3 0.746 -0.209 3
## 17 3 3.006 -0.359 3
## 18 3 2.251 0.009 3
## 19 3 2.211 -0.331 3
## 20 3 1.521 0.036 3
table(G3, Z1$class)
##
## G3 1 2 3
## 1 5 0 0
## 2 1 6 1
## 3 0 0 7
table(G3, Z2$class)
##
## G3 1 2 3
## 1 5 0 0
## 2 1 6 1
## 3 0 0 7
round(Z1$post, 3)
## 1 2 3
## 1 0.983 0.006 0.012
## 2 0.794 0.206 0.000
## 3 0.937 0.043 0.020
## 4 0.654 0.337 0.009
## 5 0.905 0.094 0.000
## 6 0.928 0.072 0.000
## 7 0.003 0.863 0.133
## 8 0.177 0.822 0.000
## 9 0.185 0.811 0.005
## 10 0.003 0.997 0.000
## 11 0.002 0.997 0.001
## 12 0.111 0.780 0.109
## 13 0.292 0.325 0.383
## 14 0.001 0.000 0.999
## 15 0.012 0.023 0.965
## 16 0.079 0.243 0.678
## 17 0.000 0.000 1.000
## 18 0.001 0.003 0.996
## 19 0.001 0.004 0.995
## 20 0.014 0.025 0.961
round(Z2$post, 3)
## 1 2 3
## 1 0.975 0.009 0.016
## 2 0.707 0.293 0.000
## 3 0.907 0.067 0.027
## 4 0.542 0.447 0.011
## 5 0.857 0.143 0.001
## 6 0.889 0.111 0.000
## 7 0.002 0.879 0.119
## 8 0.119 0.881 0.000
## 9 0.124 0.871 0.004
## 10 0.002 0.998 0.000
## 11 0.001 0.998 0.001
## 12 0.074 0.825 0.101
## 13 0.216 0.386 0.398
## 14 0.001 0.000 0.999
## 15 0.009 0.026 0.965
## 16 0.056 0.274 0.670
## 17 0.000 0.000 1.000
## 18 0.001 0.003 0.996
## 19 0.001 0.005 0.994
## 20 0.010 0.029 0.961
predict(ld1, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
##
## $posterior
## 1 2 3
## 1 0.300164 0.6980356 0.001800378
##
## $x
## LD1 LD2
## 1 -1.426693 -0.5046594
predict(ld2, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
##
## $posterior
## 1 2 3
## 1 0.2114514 0.786773 0.001775594
##
## $x
## LD1 LD2
## 1 -1.537069 -0.1367865
案例6 企业财务状况的判别分析
Case6 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvcase5.xlsx", "Case6")
Case6
## # A tibble: 46 × 5
## G CF_TD NI_TA CA_CL CA_NS
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.51 0.1 2.49 0.54
## 2 1 0.08 0.02 2.01 0.53
## 3 1 0.38 0.11 3.27 0.35
## 4 1 0.19 0.05 2.25 0.33
## 5 1 0.32 0.07 4.24 0.63
## 6 1 0.31 0.05 4.45 0.69
## 7 1 0.12 0.05 2.52 0.69
## 8 1 -0.02 0.02 2.05 0.35
## 9 1 0.22 0.08 2.35 0.4
## 10 1 0.17 0.07 1.8 0.52
## # … with 36 more rows
plot(Case6[, 2:5], gap = 0)

library(MASS)
ld = lda(G ~ ., data = Case6)
plot(ld)

Zld = predict(ld)
data.frame(Case6$G, Zld$class, round(Zld$x, 3))
## Case6.G Zld.class LD1
## 1 1 1 -1.013
## 2 1 1 0.028
## 3 1 1 -1.895
## 4 1 1 -0.625
## 5 1 1 -2.210
## 6 1 1 -2.230
## 7 1 1 -0.395
## 8 1 1 -0.158
## 9 1 1 -0.783
## 10 1 1 -0.076
## 11 1 1 -0.268
## 12 1 1 -0.102
## 13 1 2 1.271
## 14 1 1 -0.778
## 15 1 1 -0.354
## 16 1 1 -0.813
## 17 1 1 -0.308
## 18 1 1 -1.005
## 19 1 1 -0.185
## 20 1 1 -0.265
## 21 1 1 -2.808
## 22 1 1 -0.569
## 23 1 1 -1.655
## 24 1 1 -0.971
## 25 1 1 -3.562
## 26 2 2 2.997
## 27 2 2 1.904
## 28 2 2 0.776
## 29 2 2 0.790
## 30 2 2 1.196
## 31 2 2 1.426
## 32 2 2 0.764
## 33 2 2 0.887
## 34 2 2 0.512
## 35 2 2 1.278
## 36 2 2 2.725
## 37 2 2 0.325
## 38 2 2 0.238
## 39 2 2 2.249
## 40 2 1 -0.342
## 41 2 1 -0.715
## 42 2 2 0.888
## 43 2 2 0.793
## 44 2 2 0.911
## 45 2 1 -0.050
## 46 2 2 2.178
tab1 = table(Case6$G, Zld$class)
tab1
##
## 1 2
## 1 24 1
## 2 3 18
sum(diag(tab1))/sum(tab1)
## [1] 0.9130435
qd = qda(G ~ ., data = Case6)
qd
## Call:
## qda(G ~ ., data = Case6)
##
## Prior probabilities of groups:
## 1 2
## 0.5434783 0.4565217
##
## Group means:
## CF_TD NI_TA CA_CL CA_NS
## 1 0.23520000 0.05560000 2.593600 0.426800
## 2 -0.06809524 -0.08142857 1.366667 0.437619
Zqd = predict(qd)
tab2 = table(Case6$G, Zqd$class)
tab2
##
## 1 2
## 1 24 1
## 2 2 19
sum(diag(tab2))/sum(tab2)
## [1] 0.9347826
addmargins(tab2)
##
## 1 2 Sum
## 1 24 1 25
## 2 2 19 21
## Sum 26 20 46