判别分析

　　已知样本分类，给出判别函数和判别规则，以确定新样本所属类别。

　　Fisher判别：判别规则为确定性，分类时只考虑判别函数的大小。

　　Bayes判别：判别规则为统计性，分类时需要考虑概率性质。

1.Fisher判别分析

1.1线性判别分析

setwd("C:/Users/lenovo/Desktop")
d6.1<- read.table("d6.1.txt",header = T)

attach(d6.1)
plot(x1,x2)
text(x1,x2,G,adj=-0.5)#标识点所属类别

library(MASS)
(ld=lda(G~x1+x2))#线性判别模型

## Call:
## lda(G ~ x1 + x2)
## 
## Prior probabilities of groups:
##   1   2 
## 0.5 0.5 
## 
## Group means:
##      x1   x2
## 1  0.92 2.10
## 2 -0.38 8.85
## 
## Coefficients of linear discriminants:
##           LD1
## x1 -0.1035305
## x2  0.2247957

Z=predict(ld)
newG=Z$class
cbind(G,Z$x,newG)

##    G         LD1 newG
## 1  1 -0.28674901    1
## 2  1 -0.39852439    1
## 3  1 -1.29157053    1
## 4  1 -1.15846657    1
## 5  1 -1.95857603    1
## 6  1  0.94809469    2
## 7  1 -2.50987753    1
## 8  1 -0.47066104    1
## 9  1 -1.06586461    1
## 10 1 -0.06760842    1
## 11 2  0.17022402    2
## 12 2  0.49351760    2
## 13 2  2.03780185    2
## 14 2  0.38346871    2
## 15 2 -1.24038077    1
## 16 2  0.24005867    2
## 17 2  1.42347182    2
## 18 2  2.01119984    2
## 19 2  1.40540244    2
## 20 2  1.33503926    2

(tab=table(G,newG))#混淆矩阵

##    newG
## G   1 2
##   1 9 1
##   2 1 9

sum(diag(prop.table(tab))) #判对率，prop.table计算频率

## [1] 0.9

1.2距离判别分析

setwd("C:/Users/lenovo/Desktop")
d6.2<- read.table("d6.2.txt",header = T)

attach(d6.2)#绑定数据

## The following object is masked from d6.1:
## 
##     G

plot(Q,C)
text(Q,C,G,adj=-0.8)

plot(Q,P)
text(Q,P,G,adj=-0.8)

plot(C,P)
text(C,P,G,adj=-0.8)

library(mvstats)
discrim.dist(cbind(Q,C,P),as.factor(G))#马氏距离判别模型

##    G        D.1         D.2 newG
## 1  1  3.8507782 67.45440177    1
## 2  1  2.8476998 44.62599853    1
## 3  1  1.5897084 39.84664742    1
## 4  1  2.8111763 45.15294171    1
## 5  1  0.9377041 43.92182886    1
## 6  1  2.8165526 71.64252058    1
## 7  1  6.0174948 89.58755724    1
## 8  1  2.9464378 45.06306177    1
## 9  1  0.1041511 17.90918643    1
## 10 1  2.8320875 54.82120347    1
## 11 1  2.5160120 54.33005806    1
## 12 1  4.5608228 20.49558450    1
## 13 1  2.1693746  6.30248460    1
## 14 2 15.5694431  4.64954551    2
## 15 2  6.5995759  1.10207828    2
## 16 2  3.1291104  3.39680714    1
## 17 2 15.2816030  3.50621981    2
## 18 2 10.1892742  0.09427008    2
## 19 2 10.1234095  1.49170488    2
## 20 2  6.6015812  3.75937430    2

discrim.dist(cbind(Q,C,P),as.factor(G),c(8.0,7.5,65))#判别一个观测所属类别

## $Dist
##            [,1]
## [1,]  0.5537181
## [2,] 21.9250354
## 
## $newG
##      1
## newG 1

library(MASS)
(ld=lda(G~Q+C+P))

## Call:
## lda(G ~ Q + C + P)
## 
## Prior probabilities of groups:
##    1    2 
## 0.65 0.35 
## 
## Group means:
##          Q        C        P
## 1 7.976923 6.730769 61.53846
## 2 5.957143 3.714286 34.00000
## 
## Coefficients of linear discriminants:
##           LD1
## Q -0.82211427
## C -0.64614217
## P  0.01495461

W.x=predict(ld)$x
cbind(G,W=W.x,newG=ifelse(W.x<0,1,2))

##    G        LD1 LD1
## 1  1 -0.1069501   1
## 2  1 -2.4486840   1
## 3  1 -0.3569119   1
## 4  1 -0.9914270   1
## 5  1 -1.7445428   1
## 6  1 -2.5102440   1
## 7  1  0.3574261   2
## 8  1 -2.6388274   1
## 9  1 -1.2304672   1
## 10 1 -1.8499498   1
## 11 1 -1.2578515   1
## 12 1 -0.1244489   1
## 13 1  0.3531596   2
## 14 2  2.9416056   2
## 15 2  1.6046131   2
## 16 2  0.7642167   2
## 17 2  3.0877463   2
## 18 2  2.3162705   2
## 19 2  2.2697429   2
## 20 2  1.5655239   2

1.3多总体距离判别分析

setwd("C:/Users/lenovo/Desktop")
d6.3<- read.table("d6.3.txt",header = T)

attach(d6.3)#绑定数据

## The following objects are masked from d6.2:
## 
##     C, G, P, Q
## 
## The following object is masked from d6.1:
## 
##     G

plot(Q,C);text(Q,C,G,adj=-0.8,cex=0.75)

plot(Q,P);text(Q,P,G,adj=-0.8,cex=0.75)

plot(C,P);text(C,P,G,adj=-0.8,cex=0.75)

D=discrim.dist(cbind(Q,C,P),as.factor(G))#异方差马氏距离判别模型

##    G        D.1        D.2         D.3 newG
## 1  1   2.523945  9.1098063 67.45440177    1
## 2  1   2.647988  4.0185551 44.62599853    1
## 3  1   2.351227  4.5484097 39.84664742    1
## 4  1   2.834838  3.0750203 45.15294171    1
## 5  1   1.642002  2.6003823 43.92182886    1
## 6  2  36.094079  3.3813060 71.64252058    2
## 7  2 524.483775  3.5490324 89.58755724    2
## 8  2  29.617174  2.7431000 45.06306177    2
## 9  2  55.716748  0.1739398 17.90918643    2
## 10 2 159.307720  2.2098606 54.82120347    2
## 11 2 220.973242  1.7891625 54.33005806    2
## 12 2  51.324207  4.1752738 20.49558450    2
## 13 2 108.302944  2.9783249  6.30248460    2
## 14 3 277.546516 16.4806425  4.64954551    3
## 15 3 225.150834  6.5934388  1.10207828    3
## 16 3 166.717076  3.1779814  3.39680714    2
## 17 3 420.877625 13.4992376  3.50621981    3
## 18 3 273.863395  9.7542839  0.09427008    3
## 19 3 272.231240  9.2729988  1.49170488    3
## 20 3 154.746222  6.7145131  3.75937430    3

cbind(G,D=t(D[[1]]),t(D[[2]]))#显示结果

## Warning in cbind(G, D = t(D[[1]]), t(D[[2]])): number of rows of result is
## not a multiple of vector length (arg 1)

##      G                                                                   
## [1,] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 2.523945 2.647988 2.351227
##                                                                    
## [1,] 2.834838 1.642002 36.09408 524.4838 29.61717 55.71675 159.3077
##                                                                    
## [1,] 220.9732 51.32421 108.3029 277.5465 225.1508 166.7171 420.8776
##                                
## [1,] 273.8634 272.2312 154.7462

D=discrim.dist(cbind(Q,C,P),as.factor(G),var.equal=T)#同方差马氏距离判别模型

##    G        D.1      D.2        D.3 newG
## 1  1  1.4453619 7.537584 5.60401471    1
## 2  1  2.2664280 3.815572 8.03923348    1
## 3  1  0.2180557 3.847188 3.33168261    1
## 4  1  4.2187373 4.998226 6.77363946    1
## 5  1  0.1387577 2.772724 5.25312540    1
## 6  2  1.2052321 4.163269 7.96097682    1
## 7  2 11.5119044 5.038159 7.06315239    2
## 8  2  3.9971279 2.160277 8.04431766    2
## 9  2  1.9114280 0.175065 3.36826837    2
## 10 2  8.5080434 1.622153 7.96253787    2
## 11 2  8.8262908 1.651934 6.98824286    2
## 12 2  7.6853199 5.428475 6.73903800    2
## 13 2  1.7100195 1.621708 1.19880008    3
## 14 3  5.8449048 7.527269 1.54883666    3
## 15 3  3.4594845 2.790551 0.23425362    3
## 16 3  2.5944645 1.334076 0.58787430    3
## 17 3  7.0518669 5.329790 0.38554940    3
## 18 3  4.6223349 3.888282 0.01685556    3
## 19 3  5.6101515 3.985109 0.54399031    3
## 20 3  3.9324003 3.291692 0.79815067    3

cbind(G,D=t(D[[1]]),t(D[[2]]))#显示结果

## Warning in cbind(G, D = t(D[[1]]), t(D[[2]])): number of rows of result is
## not a multiple of vector length (arg 1)

##      G                                                                    
## [1,] 1 1 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 1.445362 2.266428 0.2180557
##                                                                    
## [1,] 4.218737 0.1387577 1.205232 11.5119 3.997128 1.911428 8.508043
##                                                                   
## [1,] 8.826291 7.68532 1.710019 5.844905 3.459485 2.594465 7.051867
##                              
## [1,] 4.622335 5.610151 3.9324

(ld=lda(G~Q+C+P))

## Call:
## lda(G ~ Q + C + P)
## 
## Prior probabilities of groups:
##    1    2    3 
## 0.25 0.40 0.35 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
## 
## Coefficients of linear discriminants:
##           LD1         LD2
## Q -0.81173396  0.88406311
## C -0.63090549  0.20134565
## P  0.01579385 -0.08775636
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7403 0.2597

Z=predict(ld)
newG=Z$class
cbind(G,Z$x,newG)

##    G        LD1          LD2 newG
## 1  1 -0.1409984  2.582951755    1
## 2  1 -2.3918356  0.825366275    1
## 3  1 -0.3704452  1.641514840    1
## 4  1 -0.9714835  0.548448277    1
## 5  1 -1.7134891  1.246681993    1
## 6  2 -2.4593598  1.361571174    1
## 7  2  0.3789617 -2.200431689    2
## 8  2 -2.5581070 -0.467096091    2
## 9  2 -1.1900285 -0.412972027    2
## 10 2 -1.7638874 -2.382302324    2
## 11 2 -1.1869165 -2.485574940    2
## 12 2 -0.1123680 -0.598883922    2
## 13 2  0.3399132  0.232863397    3
## 14 3  2.8456561  0.936722573    3
## 15 3  1.5592346  0.025668216    3
## 16 3  0.7457802 -0.209168159    3
## 17 3  3.0062824 -0.358989534    3
## 18 3  2.2511708  0.008852067    3
## 19 3  2.2108260 -0.331206768    3
## 20 3  1.5210939  0.035984885    3

(tab=table(G,newG))

##    newG
## G   1 2 3
##   1 5 0 0
##   2 1 6 1
##   3 0 0 7

diag(prop.table(tab,1))

##    1    2    3 
## 1.00 0.75 1.00

sum(diag(prop.table(tab)))

## [1] 0.9

plot(Z$x)
text(Z$x[,1],Z$x[,2],G,adj=-0.8,cex=0.75)

(qd=qda(G~Q+C+P))

## Call:
## qda(G ~ Q + C + P)
## 
## Prior probabilities of groups:
##    1    2    3 
## 0.25 0.40 0.35 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000

Z=predict(qd)
newG=Z$class
cbind(G,newG)

##       G newG
##  [1,] 1    1
##  [2,] 1    1
##  [3,] 1    1
##  [4,] 1    1
##  [5,] 1    1
##  [6,] 2    2
##  [7,] 2    2
##  [8,] 2    2
##  [9,] 2    2
## [10,] 2    2
## [11,] 2    2
## [12,] 2    2
## [13,] 2    3
## [14,] 3    3
## [15,] 3    3
## [16,] 3    3
## [17,] 3    3
## [18,] 3    3
## [19,] 3    3
## [20,] 3    3

(tab=table(G,newG))

##    newG
## G   1 2 3
##   1 5 0 0
##   2 0 7 1
##   3 0 0 7

sum(diag(prop.table(tab)))

## [1] 0.95

2.正态总体的Bayes判别分析

(ld1=lda(G~Q+C+P,prior=c(1,1,1)/3))#先验概率相等的Bayes判别模型

## Call:
## lda(G ~ Q + C + P, prior = c(1, 1, 1)/3)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3333333 0.3333333 0.3333333 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
## 
## Coefficients of linear discriminants:
##           LD1         LD2
## Q -0.92307369  0.76708185
## C -0.65222524  0.11482179
## P  0.02743244 -0.08484154
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7259 0.2741

(ld2=lda(G~Q+C+P,prior=c(5,8,7)/20))#先验概率不相等的Bayes判别模型

## Call:
## lda(G ~ Q + C + P, prior = c(5, 8, 7)/20)
## 
## Prior probabilities of groups:
##    1    2    3 
## 0.25 0.40 0.35 
## 
## Group means:
##          Q        C      P
## 1 8.400000 5.900000 48.200
## 2 7.712500 7.250000 69.875
## 3 5.957143 3.714286 34.000
## 
## Coefficients of linear discriminants:
##           LD1         LD2
## Q -0.81173396  0.88406311
## C -0.63090549  0.20134565
## P  0.01579385 -0.08775636
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7403 0.2597

Z1=predict(ld1)#预测所属类别        
cbind(G,Z1$x,Z1$class)#显示结果

##    G         LD1         LD2  
## 1  1 -0.40839476  2.37788417 1
## 2  1 -2.40289378  0.33402788 1
## 3  1 -0.50937350  1.41416605 1
## 4  1 -0.95822294  0.25030363 1
## 5  1 -1.78725129  0.84259965 1
## 6  2 -2.54179395  0.85631321 1
## 7  2  0.74904277 -2.29238928 2
## 8  2 -2.39414277 -0.96905637 2
## 9  2 -1.04571568 -0.73175336 2
## 10 2 -1.34999059 -2.76029783 2
## 11 2 -0.76437825 -2.78517532 2
## 12 2  0.04714807 -0.77130282 2
## 13 2  0.38367004  0.11363494 3
## 14 3  2.77223326  1.14752615 3
## 15 3  1.61976965  0.07201330 3
## 16 3  0.84520688 -0.26990599 3
## 17 3  3.10535893 -0.11489136 3
## 18 3  2.30769941  0.14824404 3
## 19 3  2.31337377 -0.19415269 3
## 20 3  1.58058919  0.07711606 3

Z2=predict(ld2)#预测所属类别
cbind(G,Z2$x,Z2$class)#显示结果

##    G        LD1          LD2  
## 1  1 -0.1409984  2.582951755 1
## 2  1 -2.3918356  0.825366275 1
## 3  1 -0.3704452  1.641514840 1
## 4  1 -0.9714835  0.548448277 1
## 5  1 -1.7134891  1.246681993 1
## 6  2 -2.4593598  1.361571174 1
## 7  2  0.3789617 -2.200431689 2
## 8  2 -2.5581070 -0.467096091 2
## 9  2 -1.1900285 -0.412972027 2
## 10 2 -1.7638874 -2.382302324 2
## 11 2 -1.1869165 -2.485574940 2
## 12 2 -0.1123680 -0.598883922 2
## 13 2  0.3399132  0.232863397 3
## 14 3  2.8456561  0.936722573 3
## 15 3  1.5592346  0.025668216 3
## 16 3  0.7457802 -0.209168159 3
## 17 3  3.0062824 -0.358989534 3
## 18 3  2.2511708  0.008852067 3
## 19 3  2.2108260 -0.331206768 3
## 20 3  1.5210939  0.035984885 3

table(G,Z1$class)#混淆矩阵

##    
## G   1 2 3
##   1 5 0 0
##   2 1 6 1
##   3 0 0 7

table(G,Z2$class)#混淆矩阵

##    
## G   1 2 3
##   1 5 0 0
##   2 1 6 1
##   3 0 0 7

Z1$post#后验概率

##               1            2            3
## 1  9.825868e-01 0.0055569542 1.185623e-02
## 2  7.942318e-01 0.2056795353 8.863083e-05
## 3  9.372086e-01 0.0431043895 1.968700e-02
## 4  6.537085e-01 0.3371446000 9.146940e-03
## 5  9.051591e-01 0.0943611123 4.797895e-04
## 6  9.278323e-01 0.0721271201 4.054001e-05
## 7  3.336193e-03 0.8632226466 1.334412e-01
## 8  1.774694e-01 0.8224629811 6.760323e-05
## 9  1.846964e-01 0.8105204167 4.783224e-03
## 10 2.846667e-03 0.9969782280 1.751051e-04
## 11 2.196368e-03 0.9968539111 9.497206e-04
## 12 1.112250e-01 0.7798203058 1.089547e-01
## 13 2.917605e-01 0.3250330167 3.832065e-01
## 14 7.593656e-04 0.0001977776 9.990429e-01
## 15 1.210206e-02 0.0227472382 9.651507e-01
## 16 7.940855e-02 0.2426608653 6.779306e-01
## 17 7.945077e-05 0.0003790029 9.995415e-01
## 18 1.392102e-03 0.0028100452 9.957979e-01
## 19 9.960190e-04 0.0042952808 9.947087e-01
## 20 1.377258e-02 0.0252493823 9.609780e-01

判别分析

tian

Monday, March 16, 2015

1.Fisher判别分析

1.1线性判别分析

1.2距离判别分析

1.3多总体距离判别分析

2.正态总体的Bayes判别分析