已知样本分类,给出判别函数和判别规则,以确定新样本所属类别。
Fisher判别:判别规则为确定性,分类时只考虑判别函数的大小。
Bayes判别:判别规则为统计性,分类时需要考虑概率性质。
setwd("C:/Users/lenovo/Desktop")
d6.1<- read.table("d6.1.txt",header = T)
attach(d6.1)
plot(x1,x2)
text(x1,x2,G,adj=-0.5)#标识点所属类别
library(MASS)
(ld=lda(G~x1+x2))#线性判别模型
## Call:
## lda(G ~ x1 + x2)
##
## Prior probabilities of groups:
## 1 2
## 0.5 0.5
##
## Group means:
## x1 x2
## 1 0.92 2.10
## 2 -0.38 8.85
##
## Coefficients of linear discriminants:
## LD1
## x1 -0.1035305
## x2 0.2247957
Z=predict(ld)
newG=Z$class
cbind(G,Z$x,newG)
## G LD1 newG
## 1 1 -0.28674901 1
## 2 1 -0.39852439 1
## 3 1 -1.29157053 1
## 4 1 -1.15846657 1
## 5 1 -1.95857603 1
## 6 1 0.94809469 2
## 7 1 -2.50987753 1
## 8 1 -0.47066104 1
## 9 1 -1.06586461 1
## 10 1 -0.06760842 1
## 11 2 0.17022402 2
## 12 2 0.49351760 2
## 13 2 2.03780185 2
## 14 2 0.38346871 2
## 15 2 -1.24038077 1
## 16 2 0.24005867 2
## 17 2 1.42347182 2
## 18 2 2.01119984 2
## 19 2 1.40540244 2
## 20 2 1.33503926 2
(tab=table(G,newG))#混淆矩阵
## newG
## G 1 2
## 1 9 1
## 2 1 9
sum(diag(prop.table(tab))) #判对率,prop.table计算频率
## [1] 0.9
setwd("C:/Users/lenovo/Desktop")
d6.2<- read.table("d6.2.txt",header = T)
attach(d6.2)#绑定数据
## The following object is masked from d6.1:
##
## G
plot(Q,C)
text(Q,C,G,adj=-0.8)
plot(Q,P)
text(Q,P,G,adj=-0.8)
plot(C,P)
text(C,P,G,adj=-0.8)
library(mvstats)
discrim.dist(cbind(Q,C,P),as.factor(G))#马氏距离判别模型
## G D.1 D.2 newG
## 1 1 3.8507782 67.45440177 1
## 2 1 2.8476998 44.62599853 1
## 3 1 1.5897084 39.84664742 1
## 4 1 2.8111763 45.15294171 1
## 5 1 0.9377041 43.92182886 1
## 6 1 2.8165526 71.64252058 1
## 7 1 6.0174948 89.58755724 1
## 8 1 2.9464378 45.06306177 1
## 9 1 0.1041511 17.90918643 1
## 10 1 2.8320875 54.82120347 1
## 11 1 2.5160120 54.33005806 1
## 12 1 4.5608228 20.49558450 1
## 13 1 2.1693746 6.30248460 1
## 14 2 15.5694431 4.64954551 2
## 15 2 6.5995759 1.10207828 2
## 16 2 3.1291104 3.39680714 1
## 17 2 15.2816030 3.50621981 2
## 18 2 10.1892742 0.09427008 2
## 19 2 10.1234095 1.49170488 2
## 20 2 6.6015812 3.75937430 2
discrim.dist(cbind(Q,C,P),as.factor(G),c(8.0,7.5,65))#判别一个观测所属类别
## $Dist
## [,1]
## [1,] 0.5537181
## [2,] 21.9250354
##
## $newG
## 1
## newG 1
library(MASS)
(ld=lda(G~Q+C+P))
## Call:
## lda(G ~ Q + C + P)
##
## Prior probabilities of groups:
## 1 2
## 0.65 0.35
##
## Group means:
## Q C P
## 1 7.976923 6.730769 61.53846
## 2 5.957143 3.714286 34.00000
##
## Coefficients of linear discriminants:
## LD1
## Q -0.82211427
## C -0.64614217
## P 0.01495461
W.x=predict(ld)$x
cbind(G,W=W.x,newG=ifelse(W.x<0,1,2))
## G LD1 LD1
## 1 1 -0.1069501 1
## 2 1 -2.4486840 1
## 3 1 -0.3569119 1
## 4 1 -0.9914270 1
## 5 1 -1.7445428 1
## 6 1 -2.5102440 1
## 7 1 0.3574261 2
## 8 1 -2.6388274 1
## 9 1 -1.2304672 1
## 10 1 -1.8499498 1
## 11 1 -1.2578515 1
## 12 1 -0.1244489 1
## 13 1 0.3531596 2
## 14 2 2.9416056 2
## 15 2 1.6046131 2
## 16 2 0.7642167 2
## 17 2 3.0877463 2
## 18 2 2.3162705 2
## 19 2 2.2697429 2
## 20 2 1.5655239 2
setwd("C:/Users/lenovo/Desktop")
d6.3<- read.table("d6.3.txt",header = T)
attach(d6.3)#绑定数据
## The following objects are masked from d6.2:
##
## C, G, P, Q
##
## The following object is masked from d6.1:
##
## G
plot(Q,C);text(Q,C,G,adj=-0.8,cex=0.75)
plot(Q,P);text(Q,P,G,adj=-0.8,cex=0.75)
plot(C,P);text(C,P,G,adj=-0.8,cex=0.75)
D=discrim.dist(cbind(Q,C,P),as.factor(G))#异方差马氏距离判别模型
## G D.1 D.2 D.3 newG
## 1 1 2.523945 9.1098063 67.45440177 1
## 2 1 2.647988 4.0185551 44.62599853 1
## 3 1 2.351227 4.5484097 39.84664742 1
## 4 1 2.834838 3.0750203 45.15294171 1
## 5 1 1.642002 2.6003823 43.92182886 1
## 6 2 36.094079 3.3813060 71.64252058 2
## 7 2 524.483775 3.5490324 89.58755724 2
## 8 2 29.617174 2.7431000 45.06306177 2
## 9 2 55.716748 0.1739398 17.90918643 2
## 10 2 159.307720 2.2098606 54.82120347 2
## 11 2 220.973242 1.7891625 54.33005806 2
## 12 2 51.324207 4.1752738 20.49558450 2
## 13 2 108.302944 2.9783249 6.30248460 2
## 14 3 277.546516 16.4806425 4.64954551 3
## 15 3 225.150834 6.5934388 1.10207828 3
## 16 3 166.717076 3.1779814 3.39680714 2
## 17 3 420.877625 13.4992376 3.50621981 3
## 18 3 273.863395 9.7542839 0.09427008 3
## 19 3 272.231240 9.2729988 1.49170488 3
## 20 3 154.746222 6.7145131 3.75937430 3
cbind(G,D=t(D[[1]]),t(D[[2]]))#显示结果
## Warning in cbind(G, D = t(D[[1]]), t(D[[2]])): number of rows of result is
## not a multiple of vector length (arg 1)
## G
## [1,] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 2.523945 2.647988 2.351227
##
## [1,] 2.834838 1.642002 36.09408 524.4838 29.61717 55.71675 159.3077
##
## [1,] 220.9732 51.32421 108.3029 277.5465 225.1508 166.7171 420.8776
##
## [1,] 273.8634 272.2312 154.7462
D=discrim.dist(cbind(Q,C,P),as.factor(G),var.equal=T)#同方差马氏距离判别模型
## G D.1 D.2 D.3 newG
## 1 1 1.4453619 7.537584 5.60401471 1
## 2 1 2.2664280 3.815572 8.03923348 1
## 3 1 0.2180557 3.847188 3.33168261 1
## 4 1 4.2187373 4.998226 6.77363946 1
## 5 1 0.1387577 2.772724 5.25312540 1
## 6 2 1.2052321 4.163269 7.96097682 1
## 7 2 11.5119044 5.038159 7.06315239 2
## 8 2 3.9971279 2.160277 8.04431766 2
## 9 2 1.9114280 0.175065 3.36826837 2
## 10 2 8.5080434 1.622153 7.96253787 2
## 11 2 8.8262908 1.651934 6.98824286 2
## 12 2 7.6853199 5.428475 6.73903800 2
## 13 2 1.7100195 1.621708 1.19880008 3
## 14 3 5.8449048 7.527269 1.54883666 3
## 15 3 3.4594845 2.790551 0.23425362 3
## 16 3 2.5944645 1.334076 0.58787430 3
## 17 3 7.0518669 5.329790 0.38554940 3
## 18 3 4.6223349 3.888282 0.01685556 3
## 19 3 5.6101515 3.985109 0.54399031 3
## 20 3 3.9324003 3.291692 0.79815067 3
cbind(G,D=t(D[[1]]),t(D[[2]]))#显示结果
## Warning in cbind(G, D = t(D[[1]]), t(D[[2]])): number of rows of result is
## not a multiple of vector length (arg 1)
## G
## [1,] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 1.445362 2.266428 0.2180557
##
## [1,] 4.218737 0.1387577 1.205232 11.5119 3.997128 1.911428 8.508043
##
## [1,] 8.826291 7.68532 1.710019 5.844905 3.459485 2.594465 7.051867
##
## [1,] 4.622335 5.610151 3.9324
(ld=lda(G~Q+C+P))
## Call:
## lda(G ~ Q + C + P)
##
## Prior probabilities of groups:
## 1 2 3
## 0.25 0.40 0.35
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
##
## Coefficients of linear discriminants:
## LD1 LD2
## Q -0.81173396 0.88406311
## C -0.63090549 0.20134565
## P 0.01579385 -0.08775636
##
## Proportion of trace:
## LD1 LD2
## 0.7403 0.2597
Z=predict(ld)
newG=Z$class
cbind(G,Z$x,newG)
## G LD1 LD2 newG
## 1 1 -0.1409984 2.582951755 1
## 2 1 -2.3918356 0.825366275 1
## 3 1 -0.3704452 1.641514840 1
## 4 1 -0.9714835 0.548448277 1
## 5 1 -1.7134891 1.246681993 1
## 6 2 -2.4593598 1.361571174 1
## 7 2 0.3789617 -2.200431689 2
## 8 2 -2.5581070 -0.467096091 2
## 9 2 -1.1900285 -0.412972027 2
## 10 2 -1.7638874 -2.382302324 2
## 11 2 -1.1869165 -2.485574940 2
## 12 2 -0.1123680 -0.598883922 2
## 13 2 0.3399132 0.232863397 3
## 14 3 2.8456561 0.936722573 3
## 15 3 1.5592346 0.025668216 3
## 16 3 0.7457802 -0.209168159 3
## 17 3 3.0062824 -0.358989534 3
## 18 3 2.2511708 0.008852067 3
## 19 3 2.2108260 -0.331206768 3
## 20 3 1.5210939 0.035984885 3
(tab=table(G,newG))
## newG
## G 1 2 3
## 1 5 0 0
## 2 1 6 1
## 3 0 0 7
diag(prop.table(tab,1))
## 1 2 3
## 1.00 0.75 1.00
sum(diag(prop.table(tab)))
## [1] 0.9
plot(Z$x)
text(Z$x[,1],Z$x[,2],G,adj=-0.8,cex=0.75)
(qd=qda(G~Q+C+P))
## Call:
## qda(G ~ Q + C + P)
##
## Prior probabilities of groups:
## 1 2 3
## 0.25 0.40 0.35
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
Z=predict(qd)
newG=Z$class
cbind(G,newG)
## G newG
## [1,] 1 1
## [2,] 1 1
## [3,] 1 1
## [4,] 1 1
## [5,] 1 1
## [6,] 2 2
## [7,] 2 2
## [8,] 2 2
## [9,] 2 2
## [10,] 2 2
## [11,] 2 2
## [12,] 2 2
## [13,] 2 3
## [14,] 3 3
## [15,] 3 3
## [16,] 3 3
## [17,] 3 3
## [18,] 3 3
## [19,] 3 3
## [20,] 3 3
(tab=table(G,newG))
## newG
## G 1 2 3
## 1 5 0 0
## 2 0 7 1
## 3 0 0 7
sum(diag(prop.table(tab)))
## [1] 0.95
(ld1=lda(G~Q+C+P,prior=c(1,1,1)/3))#先验概率相等的Bayes判别模型
## Call:
## lda(G ~ Q + C + P, prior = c(1, 1, 1)/3)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
##
## Coefficients of linear discriminants:
## LD1 LD2
## Q -0.92307369 0.76708185
## C -0.65222524 0.11482179
## P 0.02743244 -0.08484154
##
## Proportion of trace:
## LD1 LD2
## 0.7259 0.2741
(ld2=lda(G~Q+C+P,prior=c(5,8,7)/20))#先验概率不相等的Bayes判别模型
## Call:
## lda(G ~ Q + C + P, prior = c(5, 8, 7)/20)
##
## Prior probabilities of groups:
## 1 2 3
## 0.25 0.40 0.35
##
## Group means:
## Q C P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
##
## Coefficients of linear discriminants:
## LD1 LD2
## Q -0.81173396 0.88406311
## C -0.63090549 0.20134565
## P 0.01579385 -0.08775636
##
## Proportion of trace:
## LD1 LD2
## 0.7403 0.2597
Z1=predict(ld1)#预测所属类别
cbind(G,Z1$x,Z1$class)#显示结果
## G LD1 LD2
## 1 1 -0.40839476 2.37788417 1
## 2 1 -2.40289378 0.33402788 1
## 3 1 -0.50937350 1.41416605 1
## 4 1 -0.95822294 0.25030363 1
## 5 1 -1.78725129 0.84259965 1
## 6 2 -2.54179395 0.85631321 1
## 7 2 0.74904277 -2.29238928 2
## 8 2 -2.39414277 -0.96905637 2
## 9 2 -1.04571568 -0.73175336 2
## 10 2 -1.34999059 -2.76029783 2
## 11 2 -0.76437825 -2.78517532 2
## 12 2 0.04714807 -0.77130282 2
## 13 2 0.38367004 0.11363494 3
## 14 3 2.77223326 1.14752615 3
## 15 3 1.61976965 0.07201330 3
## 16 3 0.84520688 -0.26990599 3
## 17 3 3.10535893 -0.11489136 3
## 18 3 2.30769941 0.14824404 3
## 19 3 2.31337377 -0.19415269 3
## 20 3 1.58058919 0.07711606 3
Z2=predict(ld2)#预测所属类别
cbind(G,Z2$x,Z2$class)#显示结果
## G LD1 LD2
## 1 1 -0.1409984 2.582951755 1
## 2 1 -2.3918356 0.825366275 1
## 3 1 -0.3704452 1.641514840 1
## 4 1 -0.9714835 0.548448277 1
## 5 1 -1.7134891 1.246681993 1
## 6 2 -2.4593598 1.361571174 1
## 7 2 0.3789617 -2.200431689 2
## 8 2 -2.5581070 -0.467096091 2
## 9 2 -1.1900285 -0.412972027 2
## 10 2 -1.7638874 -2.382302324 2
## 11 2 -1.1869165 -2.485574940 2
## 12 2 -0.1123680 -0.598883922 2
## 13 2 0.3399132 0.232863397 3
## 14 3 2.8456561 0.936722573 3
## 15 3 1.5592346 0.025668216 3
## 16 3 0.7457802 -0.209168159 3
## 17 3 3.0062824 -0.358989534 3
## 18 3 2.2511708 0.008852067 3
## 19 3 2.2108260 -0.331206768 3
## 20 3 1.5210939 0.035984885 3
table(G,Z1$class)#混淆矩阵
##
## G 1 2 3
## 1 5 0 0
## 2 1 6 1
## 3 0 0 7
table(G,Z2$class)#混淆矩阵
##
## G 1 2 3
## 1 5 0 0
## 2 1 6 1
## 3 0 0 7
Z1$post#后验概率
## 1 2 3
## 1 9.825868e-01 0.0055569542 1.185623e-02
## 2 7.942318e-01 0.2056795353 8.863083e-05
## 3 9.372086e-01 0.0431043895 1.968700e-02
## 4 6.537085e-01 0.3371446000 9.146940e-03
## 5 9.051591e-01 0.0943611123 4.797895e-04
## 6 9.278323e-01 0.0721271201 4.054001e-05
## 7 3.336193e-03 0.8632226466 1.334412e-01
## 8 1.774694e-01 0.8224629811 6.760323e-05
## 9 1.846964e-01 0.8105204167 4.783224e-03
## 10 2.846667e-03 0.9969782280 1.751051e-04
## 11 2.196368e-03 0.9968539111 9.497206e-04
## 12 1.112250e-01 0.7798203058 1.089547e-01
## 13 2.917605e-01 0.3250330167 3.832065e-01
## 14 7.593656e-04 0.0001977776 9.990429e-01
## 15 1.210206e-02 0.0227472382 9.651507e-01
## 16 7.940855e-02 0.2426608653 6.779306e-01
## 17 7.945077e-05 0.0003790029 9.995415e-01
## 18 1.392102e-03 0.0028100452 9.957979e-01
## 19 9.960190e-04 0.0042952808 9.947087e-01
## 20 1.377258e-02 0.0252493823 9.609780e-01