setwd("D:\\Rdownload\\lianxi\\fifth") #设定工作路径
d5.5<-read.csv("ex5.5.csv", header = T) #读入数据
d5.5 #展示数据
## 序号 x1 x2 x3 x4 x5 x6 x7 x8 G
## 1 1 1000 3000 0 0.1 0.3 0.1 0.3 4 1
## 2 2 3500 2500 0 0.5 0.5 0.5 2.0 1 1
## 3 3 1200 1000 0 0.5 0.5 1.0 0.5 3 1
## 4 4 800 800 0 0.1 1.0 5.0 1.0 3 1
## 5 5 3000 2800 0 1.0 2.0 3.0 4.0 3 1
## 6 6 4500 3500 0 8.0 2.0 10.0 1.0 5 2
## 7 7 3000 2600 1 6.0 1.0 3.0 4.0 2 2
## 8 8 3000 1500 0 2.0 8.0 6.0 2.0 5 3
## 9 9 850 425 1 3.0 3.0 25.0 25.0 1 3
## 10 10 2200 1200 1 6.0 3.0 1.0 4.0 1 3
## 11 11 4000 1000 1 3.0 5.0 3.0 2.0 1 4
## 12 12 7000 3700 1 10.0 4.0 10.0 1.0 4 4
## 13 13 4500 1500 1 6.0 4.0 4.0 9.0 3 4
## 14 14 9000 2250 1 8.0 4.0 5.0 3.0 2 5
## 15 15 7500 3000 1 10.0 3.0 10.0 3.0 4 5
## 16 16 3000 1000 1 20.0 5.0 15.0 10.0 1 5
## 17 17 2500 700 1 10.0 5.0 15.0 5.0 3 5
attach(d5.5) #用变量名绑定对应数据
library(MASS) #加载MASS程序包
ld=lda(G~x1+x2+x3+x4+x5+x6+x7+x8 ,data=d5.5) #进行Fisher判别
ld #输出Fisher判别结果
## Call:
## lda(G ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8, data = d5.5)
##
## Prior probabilities of groups:
## 1 2 3 4 5
## 0.2941176 0.1176471 0.1764706 0.1764706 0.2352941
##
## Group means:
## x1 x2 x3 x4 x5 x6 x7 x8
## 1 1900.000 2020.000 0.0000000 0.440000 0.860000 1.920000 1.56000 2.800000
## 2 3750.000 3050.000 0.5000000 7.000000 1.500000 6.500000 2.50000 3.500000
## 3 2016.667 1041.667 0.6666667 3.666667 4.666667 10.666667 10.33333 2.333333
## 4 5166.667 2066.667 1.0000000 6.333333 4.333333 5.666667 4.00000 2.666667
## 5 5500.000 1737.500 1.0000000 12.000000 4.250000 11.250000 5.25000 2.500000
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4
## x1 -0.0007164378 0.0003596294 -0.0003375472 -0.0003323650
## x2 0.0009924142 -0.0006760787 0.0011282987 0.0006358171
## x3 -6.3503345862 -1.0084830487 2.3753777780 -0.3490206289
## x4 -0.1873884661 0.2314166950 -0.0738995333 0.0972101567
## x5 -0.9351221766 -0.6699135788 0.0731328954 0.0477025080
## x6 -0.0670499156 0.0718332863 -0.0458326807 0.0499189047
## x7 0.0297970134 -0.1451930438 -0.0102609391 0.0649354912
## x8 -0.6139592787 0.0719928551 0.2066526811 0.0782916174
##
## Proportion of trace:
## LD1 LD2 LD3 LD4
## 0.9494 0.0388 0.0080 0.0038
Z=predict(ld) #进行回判
newG=Z$class #列出新分类
cbind(G,newG,Z$post,Z$x) #按列合并相应部分
## G newG 1 2 3 4 5
## 1 1 1 1.000000e+00 1.227119e-12 1.383805e-27 1.358376e-46 1.535010e-59
## 2 1 1 1.000000e+00 3.837253e-12 8.817109e-26 1.143012e-43 1.171875e-54
## 3 1 1 1.000000e+00 2.976602e-09 1.117004e-21 1.435602e-37 6.106781e-47
## 4 1 1 9.999999e-01 6.715992e-08 4.448919e-19 1.997998e-34 2.212452e-43
## 5 1 1 9.999957e-01 4.279122e-06 2.602445e-15 1.396851e-30 3.808027e-41
## 6 2 2 1.057763e-05 9.999894e-01 1.168112e-08 4.568299e-17 2.441305e-22
## 7 2 2 6.418528e-12 9.954750e-01 4.524864e-03 9.403021e-08 8.403882e-14
## 8 3 3 7.161886e-24 3.835197e-08 9.997522e-01 2.478053e-04 8.956075e-10
## 9 3 3 4.693048e-22 4.194240e-07 9.999907e-01 8.900903e-06 1.064378e-10
## 10 3 3 2.094998e-18 2.100510e-03 9.950136e-01 2.885848e-03 4.609018e-08
## 11 4 4 3.988083e-33 3.046613e-11 1.820397e-02 9.810438e-01 7.522734e-04
## 12 4 5 2.164814e-45 2.527348e-15 6.778993e-08 4.269475e-01 5.730525e-01
## 13 4 4 3.090695e-36 4.249146e-12 2.155792e-03 9.926006e-01 5.243655e-03
## 14 5 5 4.435382e-51 1.318130e-20 7.483593e-11 1.220436e-02 9.877956e-01
## 15 5 5 4.596634e-46 3.042433e-16 1.645365e-09 3.390928e-02 9.660907e-01
## 16 5 5 1.494594e-51 3.071736e-20 5.024523e-11 2.798769e-04 9.997201e-01
## 17 5 5 4.084106e-47 1.191121e-17 1.036977e-07 7.307777e-02 9.269221e-01
## LD1 LD2 LD3 LD4
## 1 9.7461911 -0.2833461 1.52078495 0.581075247
## 2 9.0626224 0.4782772 -0.55790023 -0.623838300
## 3 7.9156691 1.0629396 -1.06820932 -0.728984940
## 4 7.3578545 0.8415167 -1.28118512 -0.506091792
## 5 6.8862388 -0.7603403 0.30031431 0.264499674
## 6 3.4078937 2.0081635 0.18976532 1.202704290
## 7 0.9495605 0.1213966 1.80074911 0.148372279
## 8 -1.6906835 -3.0196311 -0.50524691 -0.002168409
## 9 -1.2120796 -2.7561272 -0.99006108 1.667399028
## 10 -0.9888544 -0.7752833 0.52244718 -0.558604067
## 11 -3.9786981 -1.5927593 -0.01397653 -1.510283187
## 12 -6.1661146 0.8145515 1.73874897 0.361468201
## 13 -4.5541425 -1.1873526 0.38221384 -0.453578607
## 14 -7.0404517 1.2577527 -0.62924318 -1.695923788
## 15 -6.2243072 1.8471487 0.68651155 -0.167617827
## 16 -7.0140871 1.6821502 -1.56480112 1.593165301
## 17 -6.4566114 0.2609431 -0.53091175 0.428406897
tab=table(G, newG)
tab #新旧分类列表比较
## newG
## G 1 2 3 4 5
## 1 5 0 0 0 0
## 2 0 2 0 0 0
## 3 0 0 3 0 0
## 4 0 0 0 2 1
## 5 0 0 0 0 4
##对17个原始数据的回代判别中,有1个样本被错误分类,误判率为1/15 ≈ 0.067 或 6.7%。
sum(diag(prop.table(tab))) #计算判别正确率
## [1] 0.9411765
newdata<-data.frame(x1=2500,x2=1500,x3=0,x4=3,x5=2,x6=3,x7=4,x8=1)
predict(ld, newdata= newdata) #对newdata进行判别
## $class
## [1] 1
## Levels: 1 2 3 4 5
##
## $posterior
## 1 2 3 4 5
## 1 0.9999991 9.398528e-07 3.874671e-16 3.405835e-31 5.009639e-40
##
## $x
## LD1 LD2 LD3 LD4
## 1 6.807461 0.257595 -1.558805 -0.358043
##判别结果表明: 1 具有最高的后验概率,即该客户的信用度为 1 ,且 LD1 是最重要的线性判别式。