第6章 判别分析

6.2 线性判别分析

6.2.2 Fisher 线性判别求解

library(readxl)
d6.1 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvstats5.xlsx", "d6.1")
d6.1
## # A tibble: 20 × 3
##        G    x1    x2
##    <dbl> <dbl> <dbl>
##  1     1  -1.9   3.2
##  2     1  -6.9   0.4
##  3     1   5.2   2  
##  4     1   5     2.5
##  5     1   7.3   0  
##  6     1   6.8  12.7
##  7     1   0.9  -5.4
##  8     1 -12.5  -2.5
##  9     1   1.5   1.3
## 10     1   3.8   6.8
## 11     2   0.2   6.2
## 12     2  -0.1   7.5
## 13     2   0.4  14.6
## 14     2   2.7   8.3
## 15     2   2.1   0.8
## 16     2  -4.6   4.3
## 17     2  -1.7  10.9
## 18     2  -2.6  13.1
## 19     2   2.6  12.8
## 20     2  -2.8  10
attach(d6.1)
par(mar = c(3, 4, 1, 1), mfrow = c(1, 2))
boxplot(x1 ~ G, d6.1, ylab = "湿温差(x1)")
boxplot(x2 ~ G, d6.1, ylab = "气温差(x2)")

par(mar = c(3, 4, 1, 1), mfrow = c(1, 1))
plot(x1, x2)
text(x1, x2, G, adj = -0.5)
library(MASS)
ld = lda(G ~ x1 + x2, data = d6.1)
ld
## Call:
## lda(G ~ x1 + x2, data = d6.1)
## 
## Prior probabilities of groups:
##   1   2 
## 0.5 0.5 
## 
## Group means:
##      x1   x2
## 1  0.92 2.10
## 2 -0.38 8.85
## 
## Coefficients of linear discriminants:
##           LD1
## x1 -0.1035305
## x2  0.2247957
Z = predict(ld)
newG = Z$class
cbind(G, Z$x, newG)
##    G         LD1 newG
## 1  1 -0.28674901    1
## 2  1 -0.39852439    1
## 3  1 -1.29157053    1
## 4  1 -1.15846657    1
## 5  1 -1.95857603    1
## 6  1  0.94809469    2
## 7  1 -2.50987753    1
## 8  1 -0.47066104    1
## 9  1 -1.06586461    1
## 10 1 -0.06760842    1
## 11 2  0.17022402    2
## 12 2  0.49351760    2
## 13 2  2.03780185    2
## 14 2  0.38346871    2
## 15 2 -1.24038077    1
## 16 2  0.24005867    2
## 17 2  1.42347182    2
## 18 2  2.01119984    2
## 19 2  1.40540244    2
## 20 2  1.33503926    2
(tab = table(G, newG))
##    newG
## G   1 2
##   1 9 1
##   2 1 9
sum(diag(prop.table(tab)))
## [1] 0.9
text(x1, x2, newG, adj = -0.5)

plot(ld, type = "both")

6.3 距离判别法

d6.3 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvstats5.xlsx", "d6.3")
d6.3
## # A tibble: 20 × 4
##        Q     C     P    G3
##    <dbl> <dbl> <dbl> <dbl>
##  1   8.3   4      29     1
##  2   9.5   7      68     1
##  3   8     5      39     1
##  4   7.4   7      50     1
##  5   8.8   6.5    55     1
##  6   9     7.5    58     2
##  7   7     6      75     2
##  8   9.2   8      82     2
##  9   8     7      67     2
## 10   7.6   9      90     2
## 11   7.2   8.5    86     2
## 12   6.4   7      53     2
## 13   7.3   5      48     2
## 14   6     2      20     3
## 15   6.4   4      39     3
## 16   6.8   5      48     3
## 17   5.2   3      29     3
## 18   5.8   3.5    32     3
## 19   5.5   4      34     3
## 20   6     4.5    36     3
rm(C, P)
## Warning in rm(C, P): 找不到对象'C'
## Warning in rm(C, P): 找不到对象'P'
attach(d6.3)
par(mar = c(4, 4, 2, 1), cex = 0.75)
plot(Q, C)
text(Q, C, G3, adj = -0.5)

plot(Q, P)
text(Q, P, G3, adj = -0.5)

plot(C, P)
text(C, P, G3, adj = -0.5)

library(MASS)
qd = qda(G3 ~ Q + C + P)
qd 
## Call:
## qda(G3 ~ Q + C + P)
## 
## Prior probabilities of groups:
##    1    2    3 
## 0.25 0.40 0.35 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
qnewG = predict(qd)$class
qtab = table(G3, qnewG)
diag(prop.table(qtab, 1))
##     1     2     3 
## 1.000 0.875 1.000
sum(diag(prop.table(qtab)))
## [1] 0.95
predict(qd, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
## 
## $posterior
##             1         2            3
## 1 0.008221165 0.9915392 0.0002396287
ld = lda(G3 ~ Q + C + P)
lnewG = predict(ld)$class
cbind(G3, Wx = predict(ld)$x, lnewG)
##    G3        LD1          LD2 lnewG
## 1   1 -0.1409984  2.582951755     1
## 2   1 -2.3918356  0.825366275     1
## 3   1 -0.3704452  1.641514840     1
## 4   1 -0.9714835  0.548448277     1
## 5   1 -1.7134891  1.246681993     1
## 6   2 -2.4593598  1.361571174     1
## 7   2  0.3789617 -2.200431689     2
## 8   2 -2.5581070 -0.467096091     2
## 9   2 -1.1900285 -0.412972027     2
## 10  2 -1.7638874 -2.382302324     2
## 11  2 -1.1869165 -2.485574940     2
## 12  2 -0.1123680 -0.598883922     2
## 13  2  0.3399132  0.232863397     3
## 14  3  2.8456561  0.936722573     3
## 15  3  1.5592346  0.025668216     3
## 16  3  0.7457802 -0.209168159     3
## 17  3  3.0062824 -0.358989534     3
## 18  3  2.2511708  0.008852067     3
## 19  3  2.2108260 -0.331206768     3
## 20  3  1.5210939  0.035984885     3
ltab = table(G3, lnewG)
diag(prop.table(ltab, 1))
##    1    2    3 
## 1.00 0.75 1.00
sum(diag(prop.table(ltab)))
## [1] 0.9
predict(ld, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
## 
## $posterior
##           1        2           3
## 1 0.2114514 0.786773 0.001775594
## 
## $x
##         LD1        LD2
## 1 -1.537069 -0.1367865

6.4 Bayes 判别法

(ld1 = lda(G3 ~ Q + C + P, prior = c(1, 1, 1)/3))
## Call:
## lda(G3 ~ Q + C + P, prior = c(1, 1, 1)/3)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3333333 0.3333333 0.3333333 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
## 
## Coefficients of linear discriminants:
##           LD1         LD2
## Q -0.92307369  0.76708185
## C -0.65222524  0.11482179
## P  0.02743244 -0.08484154
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7259 0.2741
(ld2 = lda(G3 ~ Q + C + P, prior = c(5, 8, 7)/20))
## Call:
## lda(G3 ~ Q + C + P, prior = c(5, 8, 7)/20)
## 
## Prior probabilities of groups:
##    1    2    3 
## 0.25 0.40 0.35 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
## 
## Coefficients of linear discriminants:
##           LD1         LD2
## Q -0.81173396  0.88406311
## C -0.63090549  0.20134565
## P  0.01579385 -0.08775636
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7403 0.2597
Z1 = predict(ld1)
cbind(G3, round(Z1$x, 3), newG = Z1$class)
##    G3    LD1    LD2 newG
## 1   1 -0.408  2.378    1
## 2   1 -2.403  0.334    1
## 3   1 -0.509  1.414    1
## 4   1 -0.958  0.250    1
## 5   1 -1.787  0.843    1
## 6   2 -2.542  0.856    1
## 7   2  0.749 -2.292    2
## 8   2 -2.394 -0.969    2
## 9   2 -1.046 -0.732    2
## 10  2 -1.350 -2.760    2
## 11  2 -0.764 -2.785    2
## 12  2  0.047 -0.771    2
## 13  2  0.384  0.114    3
## 14  3  2.772  1.148    3
## 15  3  1.620  0.072    3
## 16  3  0.845 -0.270    3
## 17  3  3.105 -0.115    3
## 18  3  2.308  0.148    3
## 19  3  2.313 -0.194    3
## 20  3  1.581  0.077    3
Z2 = predict(ld2)
cbind(G3, round(Z2$x, 3), newG = Z2$class)
##    G3    LD1    LD2 newG
## 1   1 -0.141  2.583    1
## 2   1 -2.392  0.825    1
## 3   1 -0.370  1.642    1
## 4   1 -0.971  0.548    1
## 5   1 -1.713  1.247    1
## 6   2 -2.459  1.362    1
## 7   2  0.379 -2.200    2
## 8   2 -2.558 -0.467    2
## 9   2 -1.190 -0.413    2
## 10  2 -1.764 -2.382    2
## 11  2 -1.187 -2.486    2
## 12  2 -0.112 -0.599    2
## 13  2  0.340  0.233    3
## 14  3  2.846  0.937    3
## 15  3  1.559  0.026    3
## 16  3  0.746 -0.209    3
## 17  3  3.006 -0.359    3
## 18  3  2.251  0.009    3
## 19  3  2.211 -0.331    3
## 20  3  1.521  0.036    3
table(G3, Z1$class)
##    
## G3  1 2 3
##   1 5 0 0
##   2 1 6 1
##   3 0 0 7
table(G3, Z2$class)
##    
## G3  1 2 3
##   1 5 0 0
##   2 1 6 1
##   3 0 0 7
round(Z1$post, 3)
##        1     2     3
## 1  0.983 0.006 0.012
## 2  0.794 0.206 0.000
## 3  0.937 0.043 0.020
## 4  0.654 0.337 0.009
## 5  0.905 0.094 0.000
## 6  0.928 0.072 0.000
## 7  0.003 0.863 0.133
## 8  0.177 0.822 0.000
## 9  0.185 0.811 0.005
## 10 0.003 0.997 0.000
## 11 0.002 0.997 0.001
## 12 0.111 0.780 0.109
## 13 0.292 0.325 0.383
## 14 0.001 0.000 0.999
## 15 0.012 0.023 0.965
## 16 0.079 0.243 0.678
## 17 0.000 0.000 1.000
## 18 0.001 0.003 0.996
## 19 0.001 0.004 0.995
## 20 0.014 0.025 0.961
round(Z2$post, 3)
##        1     2     3
## 1  0.975 0.009 0.016
## 2  0.707 0.293 0.000
## 3  0.907 0.067 0.027
## 4  0.542 0.447 0.011
## 5  0.857 0.143 0.001
## 6  0.889 0.111 0.000
## 7  0.002 0.879 0.119
## 8  0.119 0.881 0.000
## 9  0.124 0.871 0.004
## 10 0.002 0.998 0.000
## 11 0.001 0.998 0.001
## 12 0.074 0.825 0.101
## 13 0.216 0.386 0.398
## 14 0.001 0.000 0.999
## 15 0.009 0.026 0.965
## 16 0.056 0.274 0.670
## 17 0.000 0.000 1.000
## 18 0.001 0.003 0.996
## 19 0.001 0.005 0.994
## 20 0.010 0.029 0.961
predict(ld1, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
## 
## $posterior
##          1         2           3
## 1 0.300164 0.6980356 0.001800378
## 
## $x
##         LD1        LD2
## 1 -1.426693 -0.5046594
predict(ld2, data.frame(Q = 8, C = 7.5, P = 65))
## $class
## [1] 2
## Levels: 1 2 3
## 
## $posterior
##           1        2           3
## 1 0.2114514 0.786773 0.001775594
## 
## $x
##         LD1        LD2
## 1 -1.537069 -0.1367865

案例6 企业财务状况的判别分析

Case6 = read_excel("C:/Users/12290/Desktop/多元统计表格/mvcase5.xlsx", "Case6")
Case6
## # A tibble: 46 × 5
##        G CF_TD NI_TA CA_CL CA_NS
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1  0.51  0.1   2.49  0.54
##  2     1  0.08  0.02  2.01  0.53
##  3     1  0.38  0.11  3.27  0.35
##  4     1  0.19  0.05  2.25  0.33
##  5     1  0.32  0.07  4.24  0.63
##  6     1  0.31  0.05  4.45  0.69
##  7     1  0.12  0.05  2.52  0.69
##  8     1 -0.02  0.02  2.05  0.35
##  9     1  0.22  0.08  2.35  0.4 
## 10     1  0.17  0.07  1.8   0.52
## # … with 36 more rows
plot(Case6[, 2:5], gap = 0)

library(MASS)
ld = lda(G ~ ., data = Case6)
plot(ld)

Zld = predict(ld)
data.frame(Case6$G, Zld$class, round(Zld$x, 3))
##    Case6.G Zld.class    LD1
## 1        1         1 -1.013
## 2        1         1  0.028
## 3        1         1 -1.895
## 4        1         1 -0.625
## 5        1         1 -2.210
## 6        1         1 -2.230
## 7        1         1 -0.395
## 8        1         1 -0.158
## 9        1         1 -0.783
## 10       1         1 -0.076
## 11       1         1 -0.268
## 12       1         1 -0.102
## 13       1         2  1.271
## 14       1         1 -0.778
## 15       1         1 -0.354
## 16       1         1 -0.813
## 17       1         1 -0.308
## 18       1         1 -1.005
## 19       1         1 -0.185
## 20       1         1 -0.265
## 21       1         1 -2.808
## 22       1         1 -0.569
## 23       1         1 -1.655
## 24       1         1 -0.971
## 25       1         1 -3.562
## 26       2         2  2.997
## 27       2         2  1.904
## 28       2         2  0.776
## 29       2         2  0.790
## 30       2         2  1.196
## 31       2         2  1.426
## 32       2         2  0.764
## 33       2         2  0.887
## 34       2         2  0.512
## 35       2         2  1.278
## 36       2         2  2.725
## 37       2         2  0.325
## 38       2         2  0.238
## 39       2         2  2.249
## 40       2         1 -0.342
## 41       2         1 -0.715
## 42       2         2  0.888
## 43       2         2  0.793
## 44       2         2  0.911
## 45       2         1 -0.050
## 46       2         2  2.178
tab1 = table(Case6$G, Zld$class)
tab1
##    
##      1  2
##   1 24  1
##   2  3 18
sum(diag(tab1))/sum(tab1)
## [1] 0.9130435
qd = qda(G ~ ., data = Case6)
qd
## Call:
## qda(G ~ ., data = Case6)
## 
## Prior probabilities of groups:
##         1         2 
## 0.5434783 0.4565217 
## 
## Group means:
##         CF_TD       NI_TA    CA_CL    CA_NS
## 1  0.23520000  0.05560000 2.593600 0.426800
## 2 -0.06809524 -0.08142857 1.366667 0.437619
Zqd = predict(qd)
tab2 = table(Case6$G, Zqd$class)
tab2
##    
##      1  2
##   1 24  1
##   2  2 19
sum(diag(tab2))/sum(tab2)
## [1] 0.9347826
addmargins(tab2)
##      
##        1  2 Sum
##   1   24  1  25
##   2    2 19  21
##   Sum 26 20  46