Fisher线性判别函数

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## √ ggplot2 3.2.1     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   1.0.0     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0

## Warning: package 'ggplot2' was built under R version 3.6.1

## Warning: package 'tibble' was built under R version 3.6.1

## Warning: package 'tidyr' was built under R version 3.6.1

## Warning: package 'purrr' was built under R version 3.6.1

## Warning: package 'dplyr' was built under R version 3.6.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(foreign)
library(MASS)
## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select
mydata<-read.spss("./例3-5.sav")
data<-as.data.frame(mydata)
data
##        city      x1     x2      x3     x4     x5     x6     x7     x8
## 1  北京      8070.4 2643.0 12128.0 2511.0 5077.9 4054.7 2629.8 1140.6
## 2  天津      8679.6 2114.0  6187.3 1663.8 3991.9 2643.6 2172.2  892.2
## 3  河北      4991.6 1614.4  4483.2 1351.1 2664.1 1991.3 1549.9  460.4
## 4  山西      3862.8 1603.0  3633.8  951.6 2401.0 2439.0 1651.6  450.1
## 5  内蒙古    6445.8 2543.3  4006.1 1565.1 3045.2 2598.9 1840.2  699.9
## 6  辽宁      6901.6 2321.3  4632.8 1558.2 3447.0 3018.5 2313.6  802.8
## 7  吉林      4975.7 1819.0  3612.0 1107.1 2691.0 2367.5 2059.2  534.9
## 8  黑龙江    5019.3 1804.4  3352.4 1018.9 2462.9 2011.5 2007.5  468.3
## 9  上海     10014.8 1834.8 13216.0 1868.2 4447.5 4533.5 2839.9 1102.1
## 10 江苏      7389.2 1809.5  6140.6 1616.2 3952.4 3163.9 1624.5  736.6
## 11 浙江      8467.3 1903.9  7385.4 1420.7 5100.9 3452.3 1691.9  645.3
## 12 安徽      6381.7 1491.0  3931.2 1118.4 2748.4 2233.3 1269.3  432.9
## 13 福建      8299.6 1443.5  6530.5 1393.4 3205.7 2461.5 1178.5  492.8
## 14 江西      5667.5 1472.2  3915.9 1028.6 2310.6 1963.9  887.4  449.6
## 15 山东      5929.4 1977.7  4473.1 1576.5 3002.5 2399.3 1610.0  526.9
## 16 河南      5067.7 1746.6  3753.4 1430.2 1993.8 2078.8 1524.5  492.8
## 17 湖北      6294.3 1557.4  4176.7 1163.8 2391.9 2228.4 1792.0  435.6
## 18 湖南      6407.7 1666.4  3918.7 1384.1 2837.1 3406.1 1362.6  437.4
## 19 广东      9421.6 1583.4  6410.4 1721.9 4198.1 3103.4 1304.5  870.1
## 20 广西      5937.2  886.3  3784.3 1032.8 2259.8 2003.0 1065.9  299.3
## 21 海南      7419.7  859.6  3527.7  954.0 2582.3 1931.3 1399.8  341.0
## 22 重庆      6883.9 1939.2  3801.1 1466.0 2573.9 2232.4 1700.0  434.4
## 23 四川      7118.4 1767.5  3756.5 1311.1 2697.6 2008.4 1423.4  577.1
## 24 贵州      6010.3 1525.4  3793.1 1270.2 2684.4 2493.5 1050.1  374.6
## 25 云南      5528.2 1195.5  3814.4 1135.1 2791.2 2217.0 1526.7  414.3
## 26 西藏      8727.8 1812.5  3614.5  983.0 2198.4  922.5  585.3  596.5
## 27 陕西      5422.0 1542.2  3681.5 1367.7 2455.7 2474.0 2016.7  409.0
## 28 甘肃      5777.3 1776.9  3752.6 1329.1 2517.9 2322.1 1583.4  479.9
## 29 青海      5975.7 1963.5  3809.4 1322.1 3064.3 2352.9 1750.4  614.9
## 30 宁夏      4889.2 1726.7  3770.5 1245.1 3896.5 2415.7 1874.0  546.6
## 31 新疆      6179.4 1966.1  3543.9 1543.8 3074.1 2404.9 1934.8  581.5

两个类别数据,两个待判样本

df <- data %>% 
  mutate(number = 1:31)%>% #编个号,方便选
  dplyr::select(number,everything())
donnot_konow <- df %>%
  filter( number %in% c(19,26)) #广东、西藏为待判样品
donnot_konow
##   number     city     x1     x2     x3     x4     x5     x6     x7    x8
## 1     19 广东     9421.6 1583.4 6410.4 1721.9 4198.1 3103.4 1304.5 870.1
## 2     26 西藏     8727.8 1812.5 3614.5  983.0 2198.4  922.5  585.3 596.5
first <- df %>%
  filter(number %in% c(1,9)) #北京、上海归为一类
first
##   number     city      x1     x2    x3     x4     x5     x6     x7     x8
## 1      1 北京      8070.4 2643.0 12128 2511.0 5077.9 4054.7 2629.8 1140.6
## 2      9 上海     10014.8 1834.8 13216 1868.2 4447.5 4533.5 2839.9 1102.1
second <- df %>%
  filter( !number %in% c(1,9,19,26)) #其余地区为一类
second
##    number     city     x1     x2     x3     x4     x5     x6     x7    x8
## 1       2 天津     8679.6 2114.0 6187.3 1663.8 3991.9 2643.6 2172.2 892.2
## 2       3 河北     4991.6 1614.4 4483.2 1351.1 2664.1 1991.3 1549.9 460.4
## 3       4 山西     3862.8 1603.0 3633.8  951.6 2401.0 2439.0 1651.6 450.1
## 4       5 内蒙古   6445.8 2543.3 4006.1 1565.1 3045.2 2598.9 1840.2 699.9
## 5       6 辽宁     6901.6 2321.3 4632.8 1558.2 3447.0 3018.5 2313.6 802.8
## 6       7 吉林     4975.7 1819.0 3612.0 1107.1 2691.0 2367.5 2059.2 534.9
## 7       8 黑龙江   5019.3 1804.4 3352.4 1018.9 2462.9 2011.5 2007.5 468.3
## 8      10 江苏     7389.2 1809.5 6140.6 1616.2 3952.4 3163.9 1624.5 736.6
## 9      11 浙江     8467.3 1903.9 7385.4 1420.7 5100.9 3452.3 1691.9 645.3
## 10     12 安徽     6381.7 1491.0 3931.2 1118.4 2748.4 2233.3 1269.3 432.9
## 11     13 福建     8299.6 1443.5 6530.5 1393.4 3205.7 2461.5 1178.5 492.8
## 12     14 江西     5667.5 1472.2 3915.9 1028.6 2310.6 1963.9  887.4 449.6
## 13     15 山东     5929.4 1977.7 4473.1 1576.5 3002.5 2399.3 1610.0 526.9
## 14     16 河南     5067.7 1746.6 3753.4 1430.2 1993.8 2078.8 1524.5 492.8
## 15     17 湖北     6294.3 1557.4 4176.7 1163.8 2391.9 2228.4 1792.0 435.6
## 16     18 湖南     6407.7 1666.4 3918.7 1384.1 2837.1 3406.1 1362.6 437.4
## 17     20 广西     5937.2  886.3 3784.3 1032.8 2259.8 2003.0 1065.9 299.3
## 18     21 海南     7419.7  859.6 3527.7  954.0 2582.3 1931.3 1399.8 341.0
## 19     22 重庆     6883.9 1939.2 3801.1 1466.0 2573.9 2232.4 1700.0 434.4
## 20     23 四川     7118.4 1767.5 3756.5 1311.1 2697.6 2008.4 1423.4 577.1
## 21     24 贵州     6010.3 1525.4 3793.1 1270.2 2684.4 2493.5 1050.1 374.6
## 22     25 云南     5528.2 1195.5 3814.4 1135.1 2791.2 2217.0 1526.7 414.3
## 23     27 陕西     5422.0 1542.2 3681.5 1367.7 2455.7 2474.0 2016.7 409.0
## 24     28 甘肃     5777.3 1776.9 3752.6 1329.1 2517.9 2322.1 1583.4 479.9
## 25     29 青海     5975.7 1963.5 3809.4 1322.1 3064.3 2352.9 1750.4 614.9
## 26     30 宁夏     4889.2 1726.7 3770.5 1245.1 3896.5 2415.7 1874.0 546.6
## 27     31 新疆     6179.4 1966.1 3543.9 1543.8 3074.1 2404.9 1934.8 581.5

下面建立费希尔判别函数,并将广东与西藏归类

firstbar<-colMeans(first[,3:10])#北京、上海这一类,那八个指标的平均值
firstbar
##       x1       x2       x3       x4       x5       x6       x7       x8 
##  9042.60  2238.90 12672.00  2189.60  4762.70  4294.10  2734.85  1121.35
secondbar<-colMeans(second[,3:10])#除北京、上海剩下城市这一类,那八个指标的平均值,

sigmafirst<-cov(first[,3:10])
sigmafirst
##           x1         x2        x3         x4         x5         x6
## x1 1890345.7 -785732.04 1057753.6 -624930.16 -612874.88  465489.36
## x2 -785732.0  326593.62 -439660.8  259755.48  254744.64 -193483.08
## x3 1057753.6 -439660.80  591872.0 -349683.20 -342937.60  260467.20
## x4 -624930.2  259755.48 -349683.2  206595.92  202610.56 -153886.32
## x5 -612874.9  254744.64 -342937.6  202610.56  198702.08 -150917.76
## x6  465489.4 -193483.08  260467.2 -153886.32 -150917.76  114624.72
## x7  204259.2  -84901.41  114294.4  -67526.14  -66223.52   50297.94
## x8  -37429.7   15557.85  -20944.0   12373.90   12135.20   -9216.90
##            x7         x8
## x1 204259.220 -37429.700
## x2 -84901.410  15557.850
## x3 114294.400 -20944.000
## x4 -67526.140  12373.900
## x5 -66223.520  12135.200
## x6  50297.940  -9216.900
## x7  22071.005  -4044.425
## x8  -4044.425    741.125
sigmasecond<-cov(second[,3:10])
sigmasecond
##             x1        x2         x3        x4        x5        x6
## x1 1350072.167  57475.64  882691.85 118834.79 465123.66 203424.08
## x2   57475.636 135087.78   78701.83  55496.81  98760.67  66431.37
## x3  882691.851  78701.83 1073285.66 108681.78 542771.72 256115.87
## x4  118834.795  55496.81  108681.78  45070.30  72511.11  47391.94
## x5  465123.658  98760.67  542771.72  72511.11 448471.48 198259.05
## x6  203424.077  66431.37  256115.87  47391.94 198259.05 171720.11
## x7   -6737.919  82062.52   18761.12  29208.02  77587.63  39397.23
## x8   74307.576  40168.09   78143.49  20927.28  63622.06  30159.47
##            x7       x8
## x1  -6737.919 74307.58
## x2  82062.522 40168.09
## x3  18761.123 78143.49
## x4  29208.024 20927.28
## x5  77587.628 63622.06
## x6  39397.233 30159.47
## x7 123891.400 29826.20
## x8  29826.198 19381.16
sigmafirst*1+sigmasecond*26
##             x1        x2         x3        x4       x5        x6
## x1 36992222.02  708634.5 24007741.7 2464774.5 11480340 5754515.4
## x2   708634.48 3838875.9  1606586.8 1702672.5  2822522 1533732.5
## x3 24007741.72 1606586.8 28497299.2 2476043.2 13769127 6919479.7
## x4  2464774.51 1702672.5  2476043.2 1378423.6  2087899 1078304.2
## x5 11480340.22 2822522.1 13769127.1 2087899.5 11858961 5003817.5
## x6  5754515.36 1533732.5  6919479.7 1078304.2  5003818 4579347.5
## x7    29073.33 2048724.2   602083.6  691882.5  1951055 1074626.0
## x8  1894567.28 1059928.3  2010786.7  556483.3  1666309  774929.3
##            x7        x8
## x1   29073.33 1894567.3
## x2 2048724.17 1059928.3
## x3  602083.59 2010786.7
## x4  691882.49  556483.3
## x5 1951054.81 1666308.9
## x6 1074626.01  774929.3
## x7 3243247.41  771436.7
## x8  771436.73  504651.3
sigma<-(1/27)*(sigmafirst*1+sigmasecond*26)# 合并协方差 = [第一组组数减1*第一组数的协方差矩阵+第二组组数减1*第二组数的协方差矩阵]/两组数总和-2
isigma<-solve(sigma)#解普通方程组可以用函数solve(),solve()的基本用法是solve(A,b),其中,A为方程组的系数矩阵,b为方程组的右端。
isigma
##               x1            x2            x3            x4            x5
## x1  1.870797e-06  2.727665e-06 -1.075786e-06 -2.325264e-06  2.062894e-07
## x2  2.727665e-06  3.719901e-05  3.098620e-06 -2.803182e-05  3.759616e-07
## x3 -1.075786e-06  3.098620e-06  3.915187e-06 -1.373249e-06 -2.351212e-06
## x4 -2.325264e-06 -2.803182e-05 -1.373249e-06  5.795242e-05 -1.368254e-06
## x5  2.062894e-07  3.759616e-07 -2.351212e-06 -1.368254e-06  7.982084e-06
## x6 -2.541857e-07 -3.262367e-06 -1.595938e-06  5.147847e-08 -3.191265e-06
## x7  6.113240e-07 -2.444204e-06  1.681203e-06  5.010538e-06 -5.769528e-07
## x8 -7.127095e-06 -6.230097e-05 -8.910986e-06  5.951788e-06 -1.126056e-05
##               x6            x7            x8
## x1 -2.541857e-07  6.113240e-07 -7.127095e-06
## x2 -3.262367e-06 -2.444204e-06 -6.230097e-05
## x3 -1.595938e-06  1.681203e-06 -8.910986e-06
## x4  5.147847e-08  5.010538e-06  5.951788e-06
## x5 -3.191265e-06 -5.769528e-07 -1.126056e-05
## x6  1.213177e-05 -1.858449e-06  8.857471e-06
## x7 -1.858449e-06  1.657116e-05 -2.995808e-05
## x8  8.857471e-06 -2.995808e-05  3.094290e-04
(firstbar-secondbar)%*%isigma #判别函数系数,将数据代入, 
##                x1          x2         x3         x4          x5
## [1,] -0.008060849 -0.01671964 0.01950031 0.02471871 -0.01888066
##               x6         x7        x8
## [1,] 0.004303822 0.01479747 0.0257263
                             #即得分 =-0.008060849*x1 -0.01671964*x2+0.01950031*x3 +0.02471871*x4 -0.01888066*x5 +0.004303822*x6 +0.01479747*x7+ 0.0257263*x8
(firstbar-secondbar)%*%isigma%*%(firstbar+secondbar)/2#判别中点,为109.1717
##          [,1]
## [1,] 109.1717
(firstbar-secondbar)%*%isigma%*%(firstbar-secondbar)#马氏距离,也为最大的特征值 max lambde = (两组平均之差)*sigma* (两组平均之差) =D^2
##          [,1]
## [1,] 159.2218
firstrd<-as.matrix(first[,3:10])
firstrd
##           x1     x2    x3     x4     x5     x6     x7     x8
## [1,]  8070.4 2643.0 12128 2511.0 5077.9 4054.7 2629.8 1140.6
## [2,] 10014.8 1834.8 13216 1868.2 4447.5 4533.5 2839.9 1102.1
secondrd<-as.matrix(second[,3:10])
donnot_konowrd<-as.matrix(donnot_konow[,3:10])

first_judge<-(firstbar-secondbar)%*%isigma%*%t(firstrd)
first_judge#计算出北京上海得分分别为179.1586 、198.4066都比判别中点109.1717大
##          [,1]     [,2]
## [1,] 179.1586 198.4066
second_judge<-(firstbar-secondbar)%*%isigma%*%t(secondrd)
second_judge#计算出除北京上海其余得分分别为:47.57496 46.64175 37.627 21.25174 37.21164 30.89321 23.83951 51.85801 39.23608 15.5736 50.93808 21.01187 36.34956 45.03187 35.59145 23.62458 26.07357 7.23423 19.78959 12.35001 16.63631 27.98147 40.75376 27.9821 19.95792 14.643 20.48342
##          [,1]     [,2]   [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## [1,] 47.57496 46.64175 37.627 21.25174 37.21164 30.89321 23.83951 51.85801
##          [,9]   [,10]    [,11]    [,12]    [,13]    [,14]    [,15]
## [1,] 39.23608 15.5736 50.93808 21.01187 36.34956 45.03187 35.59145
##         [,16]    [,17]   [,18]    [,19]    [,20]    [,21]    [,22]
## [1,] 23.62458 26.07357 7.23423 19.78959 12.35001 16.63631 27.98147
##         [,23]   [,24]    [,25]  [,26]    [,27]
## [1,] 40.75376 27.9821 19.95792 14.643 20.48342
donnot_konow_judge<-(firstbar-secondbar)%*%isigma%*%t(donnot_konowrd)
donnot_konow_judge#计算出广东、西藏得分为:40.92931 -19.40572(广东这    么低的么)
##          [,1]      [,2]
## [1,] 40.92931 -19.40572

用包来实现类别的判断与分类

df <- data %>% 
  mutate(number = 1:31)%>% #编个号,方便选
  dplyr::select(number,everything())
donnot_konow <- df %>%
  filter( number %in% c(19,26)) #广东、西藏为待判样品
donnot_konow
##   number     city     x1     x2     x3     x4     x5     x6     x7    x8
## 1     19 广东     9421.6 1583.4 6410.4 1721.9 4198.1 3103.4 1304.5 870.1
## 2     26 西藏     8727.8 1812.5 3614.5  983.0 2198.4  922.5  585.3 596.5
first <- df %>%
  filter(number %in% c(1,9)) %>% 
  mutate(genre = 1) %>% #北京上海为1类,顺便这一类的全部标记个1,这个包应该算这个为先验信息,不然没法做
  dplyr::select(genre,number,everything())
first
##   genre number     city      x1     x2    x3     x4     x5     x6     x7
## 1     1      1 北京      8070.4 2643.0 12128 2511.0 5077.9 4054.7 2629.8
## 2     1      9 上海     10014.8 1834.8 13216 1868.2 4447.5 4533.5 2839.9
##       x8
## 1 1140.6
## 2 1102.1
second <- df %>%
  filter( !number %in% c(1,9,19,26)) %>% 
  mutate(genre = 2) %>% #除广东、西藏、北京上海为2类,并且全部标记个2
  dplyr::select(genre,number,everything())
second
##    genre number     city     x1     x2     x3     x4     x5     x6     x7
## 1      2      2 天津     8679.6 2114.0 6187.3 1663.8 3991.9 2643.6 2172.2
## 2      2      3 河北     4991.6 1614.4 4483.2 1351.1 2664.1 1991.3 1549.9
## 3      2      4 山西     3862.8 1603.0 3633.8  951.6 2401.0 2439.0 1651.6
## 4      2      5 内蒙古   6445.8 2543.3 4006.1 1565.1 3045.2 2598.9 1840.2
## 5      2      6 辽宁     6901.6 2321.3 4632.8 1558.2 3447.0 3018.5 2313.6
## 6      2      7 吉林     4975.7 1819.0 3612.0 1107.1 2691.0 2367.5 2059.2
## 7      2      8 黑龙江   5019.3 1804.4 3352.4 1018.9 2462.9 2011.5 2007.5
## 8      2     10 江苏     7389.2 1809.5 6140.6 1616.2 3952.4 3163.9 1624.5
## 9      2     11 浙江     8467.3 1903.9 7385.4 1420.7 5100.9 3452.3 1691.9
## 10     2     12 安徽     6381.7 1491.0 3931.2 1118.4 2748.4 2233.3 1269.3
## 11     2     13 福建     8299.6 1443.5 6530.5 1393.4 3205.7 2461.5 1178.5
## 12     2     14 江西     5667.5 1472.2 3915.9 1028.6 2310.6 1963.9  887.4
## 13     2     15 山东     5929.4 1977.7 4473.1 1576.5 3002.5 2399.3 1610.0
## 14     2     16 河南     5067.7 1746.6 3753.4 1430.2 1993.8 2078.8 1524.5
## 15     2     17 湖北     6294.3 1557.4 4176.7 1163.8 2391.9 2228.4 1792.0
## 16     2     18 湖南     6407.7 1666.4 3918.7 1384.1 2837.1 3406.1 1362.6
## 17     2     20 广西     5937.2  886.3 3784.3 1032.8 2259.8 2003.0 1065.9
## 18     2     21 海南     7419.7  859.6 3527.7  954.0 2582.3 1931.3 1399.8
## 19     2     22 重庆     6883.9 1939.2 3801.1 1466.0 2573.9 2232.4 1700.0
## 20     2     23 四川     7118.4 1767.5 3756.5 1311.1 2697.6 2008.4 1423.4
## 21     2     24 贵州     6010.3 1525.4 3793.1 1270.2 2684.4 2493.5 1050.1
## 22     2     25 云南     5528.2 1195.5 3814.4 1135.1 2791.2 2217.0 1526.7
## 23     2     27 陕西     5422.0 1542.2 3681.5 1367.7 2455.7 2474.0 2016.7
## 24     2     28 甘肃     5777.3 1776.9 3752.6 1329.1 2517.9 2322.1 1583.4
## 25     2     29 青海     5975.7 1963.5 3809.4 1322.1 3064.3 2352.9 1750.4
## 26     2     30 宁夏     4889.2 1726.7 3770.5 1245.1 3896.5 2415.7 1874.0
## 27     2     31 新疆     6179.4 1966.1 3543.9 1543.8 3074.1 2404.9 1934.8
##       x8
## 1  892.2
## 2  460.4
## 3  450.1
## 4  699.9
## 5  802.8
## 6  534.9
## 7  468.3
## 8  736.6
## 9  645.3
## 10 432.9
## 11 492.8
## 12 449.6
## 13 526.9
## 14 492.8
## 15 435.6
## 16 437.4
## 17 299.3
## 18 341.0
## 19 434.4
## 20 577.1
## 21 374.6
## 22 414.3
## 23 409.0
## 24 479.9
## 25 614.9
## 26 546.6
## 27 581.5
train_sample <- first %>% #把除了广东与西藏的其余城市合并,开始造作了
  rbind(second)
train_sample
##    genre number     city      x1     x2      x3     x4     x5     x6
## 1      1      1 北京      8070.4 2643.0 12128.0 2511.0 5077.9 4054.7
## 2      1      9 上海     10014.8 1834.8 13216.0 1868.2 4447.5 4533.5
## 3      2      2 天津      8679.6 2114.0  6187.3 1663.8 3991.9 2643.6
## 4      2      3 河北      4991.6 1614.4  4483.2 1351.1 2664.1 1991.3
## 5      2      4 山西      3862.8 1603.0  3633.8  951.6 2401.0 2439.0
## 6      2      5 内蒙古    6445.8 2543.3  4006.1 1565.1 3045.2 2598.9
## 7      2      6 辽宁      6901.6 2321.3  4632.8 1558.2 3447.0 3018.5
## 8      2      7 吉林      4975.7 1819.0  3612.0 1107.1 2691.0 2367.5
## 9      2      8 黑龙江    5019.3 1804.4  3352.4 1018.9 2462.9 2011.5
## 10     2     10 江苏      7389.2 1809.5  6140.6 1616.2 3952.4 3163.9
## 11     2     11 浙江      8467.3 1903.9  7385.4 1420.7 5100.9 3452.3
## 12     2     12 安徽      6381.7 1491.0  3931.2 1118.4 2748.4 2233.3
## 13     2     13 福建      8299.6 1443.5  6530.5 1393.4 3205.7 2461.5
## 14     2     14 江西      5667.5 1472.2  3915.9 1028.6 2310.6 1963.9
## 15     2     15 山东      5929.4 1977.7  4473.1 1576.5 3002.5 2399.3
## 16     2     16 河南      5067.7 1746.6  3753.4 1430.2 1993.8 2078.8
## 17     2     17 湖北      6294.3 1557.4  4176.7 1163.8 2391.9 2228.4
## 18     2     18 湖南      6407.7 1666.4  3918.7 1384.1 2837.1 3406.1
## 19     2     20 广西      5937.2  886.3  3784.3 1032.8 2259.8 2003.0
## 20     2     21 海南      7419.7  859.6  3527.7  954.0 2582.3 1931.3
## 21     2     22 重庆      6883.9 1939.2  3801.1 1466.0 2573.9 2232.4
## 22     2     23 四川      7118.4 1767.5  3756.5 1311.1 2697.6 2008.4
## 23     2     24 贵州      6010.3 1525.4  3793.1 1270.2 2684.4 2493.5
## 24     2     25 云南      5528.2 1195.5  3814.4 1135.1 2791.2 2217.0
## 25     2     27 陕西      5422.0 1542.2  3681.5 1367.7 2455.7 2474.0
## 26     2     28 甘肃      5777.3 1776.9  3752.6 1329.1 2517.9 2322.1
## 27     2     29 青海      5975.7 1963.5  3809.4 1322.1 3064.3 2352.9
## 28     2     30 宁夏      4889.2 1726.7  3770.5 1245.1 3896.5 2415.7
## 29     2     31 新疆      6179.4 1966.1  3543.9 1543.8 3074.1 2404.9
##        x7     x8
## 1  2629.8 1140.6
## 2  2839.9 1102.1
## 3  2172.2  892.2
## 4  1549.9  460.4
## 5  1651.6  450.1
## 6  1840.2  699.9
## 7  2313.6  802.8
## 8  2059.2  534.9
## 9  2007.5  468.3
## 10 1624.5  736.6
## 11 1691.9  645.3
## 12 1269.3  432.9
## 13 1178.5  492.8
## 14  887.4  449.6
## 15 1610.0  526.9
## 16 1524.5  492.8
## 17 1792.0  435.6
## 18 1362.6  437.4
## 19 1065.9  299.3
## 20 1399.8  341.0
## 21 1700.0  434.4
## 22 1423.4  577.1
## 23 1050.1  374.6
## 24 1526.7  414.3
## 25 2016.7  409.0
## 26 1583.4  479.9
## 27 1750.4  614.9
## 28 1874.0  546.6
## 29 1934.8  581.5
params <- lda(genre~x1+x2+x3+x4+x5+x6+x7+x8, data=train_sample)#genre为分类标识,其余为八个指标
params# 判别函数为 0.0006388214*x1 + 0.0013250299*x2 -0.0015453976*x3  -0.0019589553*x4 +0.0014962902*x5 -0.0003410774*x6  -0.0011726981*x7 -0.0020388069*x8 
## Call:
## lda(genre ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8, data = train_sample)
## 
## Prior probabilities of groups:
##          1          2 
## 0.06896552 0.93103448 
## 
## Group means:
##         x1       x2        x3       x4       x5     x6       x7        x8
## 1 9042.600 2238.900 12672.000 2189.600 4762.700 4294.1 2734.850 1121.3500
## 2 6219.337 1705.056  4265.485 1308.322 2920.152 2419.0 1624.448  519.6704
## 
## Coefficients of linear discriminants:
##              LD1
## x1  0.0006388214
## x2  0.0013250299
## x3 -0.0015453976
## x4 -0.0019589553
## x5  0.0014962902
## x6 -0.0003410774
## x7 -0.0011726981
## x8 -0.0020388069
self_predict <-  predict(params)#用这个预测下本来就已经分类的呢,来比较下有没有错,嗯结果是一样的
cbind(train_sample$genre,self_predict$x,self_predict$class)#第一列为本来的类别,第二列为得分把,第三列为用MASS包计算来的类别
##               LD1  
## 1  1 -10.98538469 1
## 2  1 -12.51078456 1
## 3  2  -0.55739557 2
## 4  2  -0.48343902 2
## 5  2   0.23097937 2
## 6  2   1.52871713 2
## 7  2   0.26389623 2
## 8  2   0.76463092 2
## 9  2   1.32363596 2
## 10 2  -0.89682690 2
## 11 2   0.10345994 2
## 12 2   1.97870916 2
## 13 2  -0.82392271 2
## 14 2   1.54772684 2
## 15 2   0.33221626 2
## 16 2  -0.35585638 2
## 17 2   0.39229619 2
## 18 2   1.34066979 2
## 19 2   1.14658710 2
## 20 2   2.63960296 2
## 21 2   1.64459206 2
## 22 2   2.23417796 2
## 23 2   1.89448886 2
## 24 2   0.99538672 2
## 25 2  -0.01681659 2
## 26 2   0.99533667 2
## 27 2   1.63125178 2
## 28 2   2.05245883 2
## 29 2   1.58960569 2
P <- predict(params, donnot_konow)#预测广东与西藏
cbind(P$x,P$class)#看到都被分到了第二类
##           LD1  
## 1 -0.03072896 2
## 2  4.75081654 2