Classification Tree
fish <- read.table("file:///C:/Users/Asus/Documents/GitHub/classifng_fish/fish.data.txt", header = T)
library(ggplot2)
ggplot(data = fish,aes(x=fish$Weight))+
geom_point(aes(y=fish$L1))

library(rpart)
require(rpart.plot)
## Loading required package: rpart.plot
fish.control <- rpart.control(minisplit = 10, minbucket = 3, xval = 0)
fish.treeorig <- rpart(Species~Weight+L1+L2+L3+Height+Width,data=fish,method="class",control=fish.control)
#Let’s now plot the tree:
plot(fish.treeorig)
text(fish.treeorig)

prp(fish.treeorig, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
extra=2 ) # number of correct classifications / number of observations in that node

#Also check out the complexity parameter (CP):
printcp(fish.treeorig)
##
## Classification tree:
## rpart(formula = Species ~ Weight + L1 + L2 + L3 + Height + Width,
## data = fish, method = "class", control = fish.control)
##
## Variables actually used in tree construction:
## [1] Height L1 L3 Weight Width
##
## Root node error: 94/148 = 0.63514
##
## n= 148
##
## CP nsplit rel error
## 1 0.351064 0 1.00000
## 2 0.170213 1 0.64894
## 3 0.127660 2 0.47872
## 4 0.106383 3 0.35106
## 5 0.053191 4 0.24468
## 6 0.031915 5 0.19149
## 7 0.010638 6 0.15957
## 8 0.010000 10 0.11702
summary(fish.treeorig)
## Call:
## rpart(formula = Species ~ Weight + L1 + L2 + L3 + Height + Width,
## data = fish, method = "class", control = fish.control)
## n= 148
##
## CP nsplit rel error
## 1 0.35106383 0 1.0000000
## 2 0.17021277 1 0.6489362
## 3 0.12765957 2 0.4787234
## 4 0.10638298 3 0.3510638
## 5 0.05319149 4 0.2446809
## 6 0.03191489 5 0.1914894
## 7 0.01063830 6 0.1595745
## 8 0.01000000 10 0.1170213
##
## Variable importance
## Height L3 L2 L1 Weight Width
## 26 16 15 15 15 13
##
## Node number 1: 148 observations, complexity param=0.3510638
## predicted class=perch expected loss=0.6351351 P(node) =1
## class counts: 33 10 54 16 18 12 5
## probabilities: 0.223 0.068 0.365 0.108 0.122 0.081 0.034
## left son=2 (43 obs) right son=3 (105 obs)
## Primary splits:
## Height < 33.9 to the right, improve=29.75863, (0 missing)
## Width < 11.85 to the right, improve=17.98385, (0 missing)
## L3 < 29.7 to the right, improve=13.80398, (0 missing)
## L2 < 28.85 to the right, improve=12.96300, (0 missing)
## L1 < 26.1 to the right, improve=12.56245, (0 missing)
##
## Node number 2: 43 observations, complexity param=0.106383
## predicted class=bream expected loss=0.2325581 P(node) =0.2905405
## class counts: 33 10 0 0 0 0 0
## probabilities: 0.767 0.233 0.000 0.000 0.000 0.000 0.000
## left son=4 (33 obs) right son=5 (10 obs)
## Primary splits:
## L3 < 29.5 to the right, improve=15.348840, (0 missing)
## L2 < 26.15 to the right, improve=13.530660, (0 missing)
## L1 < 23.1 to the right, improve=13.407660, (0 missing)
## Weight < 331.5 to the right, improve=12.015500, (0 missing)
## Width < 14.85 to the right, improve= 1.063123, (0 missing)
## Surrogate splits:
## L1 < 23.1 to the right, agree=0.977, adj=0.9, (0 split)
## L2 < 25.2 to the right, agree=0.977, adj=0.9, (0 split)
## Weight < 221 to the right, agree=0.953, adj=0.8, (0 split)
##
## Node number 3: 105 observations, complexity param=0.1702128
## predicted class=perch expected loss=0.4857143 P(node) =0.7094595
## class counts: 0 0 54 16 18 12 5
## probabilities: 0.000 0.000 0.514 0.152 0.171 0.114 0.048
## left son=6 (77 obs) right son=7 (28 obs)
## Primary splits:
## Height < 20.1 to the right, improve=21.78355, (0 missing)
## Width < 12.45 to the right, improve=20.93000, (0 missing)
## Weight < 25.95 to the right, improve=13.35778, (0 missing)
## L3 < 15.6 to the right, improve=10.68888, (0 missing)
## L1 < 12.3 to the right, improve=10.63876, (0 missing)
## Surrogate splits:
## Width < 12.45 to the right, agree=0.990, adj=0.964, (0 split)
## Weight < 25.95 to the right, agree=0.838, adj=0.393, (0 split)
## L1 < 12.3 to the right, agree=0.819, adj=0.321, (0 split)
## L2 < 13.35 to the right, agree=0.819, adj=0.321, (0 split)
## L3 < 14.25 to the right, agree=0.819, adj=0.321, (0 split)
##
## Node number 4: 33 observations
## predicted class=bream expected loss=0 P(node) =0.222973
## class counts: 33 0 0 0 0 0 0
## probabilities: 1.000 0.000 0.000 0.000 0.000 0.000 0.000
##
## Node number 5: 10 observations
## predicted class=parki expected loss=0 P(node) =0.06756757
## class counts: 0 10 0 0 0 0 0
## probabilities: 0.000 1.000 0.000 0.000 0.000 0.000 0.000
##
## Node number 6: 77 observations, complexity param=0.05319149
## predicted class=perch expected loss=0.2987013 P(node) =0.5202703
## class counts: 0 0 54 0 18 0 5
## probabilities: 0.000 0.000 0.701 0.000 0.234 0.000 0.065
## left son=12 (64 obs) right son=13 (13 obs)
## Primary splits:
## Width < 14.4 to the right, improve=5.777691, (0 missing)
## Height < 25.25 to the left, improve=4.275974, (0 missing)
## L1 < 25.1 to the right, improve=2.872913, (0 missing)
## L2 < 27.15 to the right, improve=2.872913, (0 missing)
## Weight < 548 to the right, improve=2.448383, (0 missing)
## Surrogate splits:
## L1 < 13.35 to the right, agree=0.844, adj=0.077, (0 split)
## L2 < 14.55 to the right, agree=0.844, adj=0.077, (0 split)
##
## Node number 7: 28 observations, complexity param=0.1276596
## predicted class=pike expected loss=0.4285714 P(node) =0.1891892
## class counts: 0 0 0 16 0 12 0
## probabilities: 0.000 0.000 0.000 0.571 0.000 0.429 0.000
## left son=14 (16 obs) right son=15 (12 obs)
## Primary splits:
## Weight < 109.95 to the right, improve=13.714290, (0 missing)
## L1 < 21.9 to the right, improve=13.714290, (0 missing)
## L2 < 23.65 to the right, improve=13.714290, (0 missing)
## L3 < 25.5 to the right, improve=13.714290, (0 missing)
## Height < 16.05 to the left, improve= 4.571429, (0 missing)
## Surrogate splits:
## L1 < 21.9 to the right, agree=1.000, adj=1.000, (0 split)
## L2 < 23.65 to the right, agree=1.000, adj=1.000, (0 split)
## L3 < 25.5 to the right, agree=1.000, adj=1.000, (0 split)
## Height < 16.05 to the left, agree=0.786, adj=0.500, (0 split)
## Width < 9.45 to the right, agree=0.714, adj=0.333, (0 split)
##
## Node number 12: 64 observations, complexity param=0.0106383
## predicted class=perch expected loss=0.21875 P(node) =0.4324324
## class counts: 0 0 50 0 9 0 5
## probabilities: 0.000 0.000 0.781 0.000 0.141 0.000 0.078
## left son=24 (40 obs) right son=25 (24 obs)
## Primary splits:
## Height < 27.55 to the left, improve=3.314583, (0 missing)
## Width < 15.65 to the right, improve=1.557526, (0 missing)
## L1 < 30 to the right, improve=1.174970, (0 missing)
## L2 < 32.25 to the right, improve=1.174970, (0 missing)
## Weight < 548 to the right, improve=1.058472, (0 missing)
## Surrogate splits:
## Width < 17.4 to the left, agree=0.719, adj=0.250, (0 split)
## Weight < 267.5 to the left, agree=0.688, adj=0.167, (0 split)
## L3 < 29.05 to the left, agree=0.656, adj=0.083, (0 split)
##
## Node number 13: 13 observations, complexity param=0.03191489
## predicted class=roach expected loss=0.3076923 P(node) =0.08783784
## class counts: 0 0 4 0 9 0 0
## probabilities: 0.000 0.000 0.308 0.000 0.692 0.000 0.000
## left son=26 (3 obs) right son=27 (10 obs)
## Primary splits:
## Height < 24.8 to the left, improve=3.7384620, (0 missing)
## Weight < 174.5 to the right, improve=1.0051280, (0 missing)
## L1 < 22.5 to the right, improve=1.0051280, (0 missing)
## L2 < 24.5 to the right, improve=1.0051280, (0 missing)
## L3 < 21.1 to the left, improve=0.4273504, (0 missing)
##
## Node number 14: 16 observations
## predicted class=pike expected loss=0 P(node) =0.1081081
## class counts: 0 0 0 16 0 0 0
## probabilities: 0.000 0.000 0.000 1.000 0.000 0.000 0.000
##
## Node number 15: 12 observations
## predicted class=smelt expected loss=0 P(node) =0.08108108
## class counts: 0 0 0 0 0 12 0
## probabilities: 0.000 0.000 0.000 0.000 0.000 1.000 0.000
##
## Node number 24: 40 observations
## predicted class=perch expected loss=0.075 P(node) =0.2702703
## class counts: 0 0 37 0 3 0 0
## probabilities: 0.000 0.000 0.925 0.000 0.075 0.000 0.000
##
## Node number 25: 24 observations, complexity param=0.0106383
## predicted class=perch expected loss=0.4583333 P(node) =0.1621622
## class counts: 0 0 13 0 6 0 5
## probabilities: 0.000 0.000 0.542 0.000 0.250 0.000 0.208
## left son=50 (9 obs) right son=51 (15 obs)
## Primary splits:
## L1 < 29.5 to the right, improve=2.772222, (0 missing)
## L2 < 31.9 to the right, improve=2.772222, (0 missing)
## Width < 16.45 to the right, improve=2.772222, (0 missing)
## Weight < 295 to the right, improve=2.583333, (0 missing)
## L3 < 32.4 to the right, improve=2.216667, (0 missing)
## Surrogate splits:
## L2 < 31.9 to the right, agree=1.000, adj=1.000, (0 split)
## Weight < 410 to the right, agree=0.958, adj=0.889, (0 split)
## L3 < 32.4 to the right, agree=0.958, adj=0.889, (0 split)
## Width < 16.45 to the right, agree=0.833, adj=0.556, (0 split)
## Height < 29.35 to the right, agree=0.667, adj=0.111, (0 split)
##
## Node number 26: 3 observations
## predicted class=perch expected loss=0 P(node) =0.02027027
## class counts: 0 0 3 0 0 0 0
## probabilities: 0.000 0.000 1.000 0.000 0.000 0.000 0.000
##
## Node number 27: 10 observations
## predicted class=roach expected loss=0.1 P(node) =0.06756757
## class counts: 0 0 1 0 9 0 0
## probabilities: 0.000 0.000 0.100 0.000 0.900 0.000 0.000
##
## Node number 50: 9 observations
## predicted class=perch expected loss=0.1111111 P(node) =0.06081081
## class counts: 0 0 8 0 0 0 1
## probabilities: 0.000 0.000 0.889 0.000 0.000 0.000 0.111
##
## Node number 51: 15 observations, complexity param=0.0106383
## predicted class=roach expected loss=0.6 P(node) =0.1013514
## class counts: 0 0 5 0 6 0 4
## probabilities: 0.000 0.000 0.333 0.000 0.400 0.000 0.267
## left son=102 (11 obs) right son=103 (4 obs)
## Primary splits:
## L3 < 29.25 to the left, improve=2.003030, (0 missing)
## Weight < 247.5 to the left, improve=1.866667, (0 missing)
## L1 < 22.85 to the left, improve=1.866667, (0 missing)
## L2 < 25 to the left, improve=1.866667, (0 missing)
## Height < 28.45 to the left, improve=1.088889, (0 missing)
## Surrogate splits:
## L1 < 24.05 to the left, agree=0.933, adj=0.75, (0 split)
## L2 < 26.25 to the left, agree=0.933, adj=0.75, (0 split)
## Weight < 303 to the left, agree=0.867, adj=0.50, (0 split)
##
## Node number 102: 11 observations, complexity param=0.0106383
## predicted class=perch expected loss=0.5454545 P(node) =0.07432432
## class counts: 0 0 5 0 5 0 1
## probabilities: 0.000 0.000 0.455 0.000 0.455 0.000 0.091
## left son=204 (7 obs) right son=205 (4 obs)
## Primary splits:
## Weight < 212.5 to the left, improve=0.4350649, (0 missing)
## L1 < 22.05 to the left, improve=0.4350649, (0 missing)
## L2 < 23.75 to the left, improve=0.4350649, (0 missing)
## L3 < 26.15 to the left, improve=0.4350649, (0 missing)
## Height < 28.5 to the left, improve=0.4350649, (0 missing)
## Surrogate splits:
## L2 < 23.75 to the left, agree=1.000, adj=1.00, (0 split)
## Height < 28.5 to the left, agree=1.000, adj=1.00, (0 split)
## L1 < 21.25 to the left, agree=0.909, adj=0.75, (0 split)
## L3 < 25.4 to the left, agree=0.909, adj=0.75, (0 split)
## Width < 14.95 to the right, agree=0.727, adj=0.25, (0 split)
##
## Node number 103: 4 observations
## predicted class=white expected loss=0.25 P(node) =0.02702703
## class counts: 0 0 0 0 1 0 3
## probabilities: 0.000 0.000 0.000 0.000 0.250 0.000 0.750
##
## Node number 204: 7 observations
## predicted class=roach expected loss=0.4285714 P(node) =0.0472973
## class counts: 0 0 3 0 4 0 0
## probabilities: 0.000 0.000 0.429 0.000 0.571 0.000 0.000
##
## Node number 205: 4 observations
## predicted class=perch expected loss=0.5 P(node) =0.02702703
## class counts: 0 0 2 0 1 0 1
## probabilities: 0.000 0.000 0.500 0.000 0.250 0.000 0.250
fish.prunetree <- prune.rpart(fish.treeorig,cp=0.02)
plot(fish.prunetree)
text(fish.prunetree)

prp(fish.prunetree, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
extra=2 ) # number of correct classifications / number of observations in that node

L21<-fish$L2-fish$L1
L32<-fish$L3-fish$L2
L31<-fish$L3-fish$L1
newfish<-cbind(fish,L21,L32,L31)
newfish.treenew<-rpart(Species~., data=newfish,method="class",parms=list(split="information"),control=fish.control)
printcp(newfish.treenew)
##
## Classification tree:
## rpart(formula = Species ~ ., data = newfish, method = "class",
## parms = list(split = "information"), control = fish.control)
##
## Variables actually used in tree construction:
## [1] Height L21 L3 L32 Weight
##
## Root node error: 94/148 = 0.63514
##
## n= 148
##
## CP nsplit rel error
## 1 0.351064 0 1.000000
## 2 0.170213 1 0.648936
## 3 0.127660 2 0.478723
## 4 0.106383 3 0.351064
## 5 0.095745 4 0.244681
## 6 0.053191 5 0.148936
## 7 0.047872 6 0.095745
## 8 0.010000 8 0.000000
plot(newfish.treenew)
text(newfish.treenew)

prp(newfish.treenew, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
extra=2 ) # number of correct classifications / number of observations in that node

#分的有點完美(有點過度配適)
fish.control <- rpart.control(minbucket=3,minsplit=10,xval=148)
newfish.treenewcv <- rpart(Species~., data=newfish,method="class",parms=list(split="information"),control=fish.control)
printcp(newfish.treenewcv)
##
## Classification tree:
## rpart(formula = Species ~ ., data = newfish, method = "class",
## parms = list(split = "information"), control = fish.control)
##
## Variables actually used in tree construction:
## [1] Height L21 L3 L32 Weight
##
## Root node error: 94/148 = 0.63514
##
## n= 148
##
## CP nsplit rel error xerror xstd
## 1 0.351064 0 1.000000 1.000000 0.062302
## 2 0.170213 1 0.648936 0.648936 0.063704
## 3 0.127660 2 0.478723 0.478723 0.059534
## 4 0.106383 3 0.351064 0.351064 0.053870
## 5 0.095745 4 0.244681 0.361702 0.054442
## 6 0.053191 5 0.148936 0.170213 0.040187
## 7 0.047872 6 0.095745 0.180851 0.041267
## 8 0.010000 8 0.000000 0.031915 0.018238
newfish.test<-read.table("file:///C:/Users/Asus/Documents/GitHub/classifng_fish/fish_test.data.txt",h=T)
L31<-newfish.test$L3- newfish.test$L1
L32<-newfish.test$L3- newfish.test$L2
L21<-newfish.test$L2- newfish.test$L1
newfish.test<-cbind(newfish.test,L21,L32,L31)
newfish.tpred<-predict(newfish.treenewcv,newfish.test)
newfish.tpred
## bream parki perch pike roach smelt white
## 1 1 0 0 0 0 0 0
## 2 1 0 0 0 0 0 0
## 3 0 0 1 0 0 0 0
## 4 0 0 1 0 0 0 0
## 5 0 0 0 1 0 0 0
## 6 0 0 0 0 0 1 0
## 7 0 0 0 0 0 1 0
## 8 0 1 0 0 0 0 0
## 9 0 0 0 0 1 0 0
## 10 0 0 0 0 1 0 0
## 11 0 0 0 0 0 0 1
Linear Discriminant Analysis
library(MASS)
newfish
## Species Weight L1 L2 L3 Height Width L21 L32 L31
## 1 bream 242.0 23.2 25.4 30.0 38.4 13.4 2.2 4.6 6.8
## 2 bream 290.0 24.0 26.3 31.2 40.0 13.8 2.3 4.9 7.2
## 3 bream 363.0 26.3 29.0 33.5 38.0 13.3 2.7 4.5 7.2
## 4 bream 430.0 26.5 29.0 34.0 36.6 15.1 2.5 5.0 7.5
## 5 bream 500.0 26.8 29.7 34.5 41.1 15.3 2.9 4.8 7.7
## 6 bream 390.0 27.6 30.0 35.0 36.2 13.4 2.4 5.0 7.4
## 7 bream 450.0 27.6 30.0 35.1 39.9 13.8 2.4 5.1 7.5
## 8 bream 500.0 28.5 30.7 36.2 39.3 13.7 2.2 5.5 7.7
## 9 bream 475.0 28.4 31.0 36.2 39.4 14.1 2.6 5.2 7.8
## 10 bream 500.0 28.7 31.0 36.2 39.7 13.3 2.3 5.2 7.5
## 11 bream 500.0 29.1 31.5 36.4 37.8 12.0 2.4 4.9 7.3
## 12 bream 500.0 29.5 32.0 37.3 37.3 13.6 2.5 5.3 7.8
## 13 bream 600.0 29.4 32.0 37.2 40.2 13.9 2.6 5.2 7.8
## 14 bream 600.0 29.4 32.0 37.2 41.5 15.0 2.6 5.2 7.8
## 15 bream 700.0 30.4 33.0 38.3 38.8 13.8 2.6 5.3 7.9
## 16 bream 700.0 30.4 33.0 38.5 38.8 13.5 2.6 5.5 8.1
## 17 bream 610.0 30.9 33.5 38.6 40.5 13.3 2.6 5.1 7.7
## 18 bream 650.0 31.0 33.5 38.7 37.4 14.8 2.5 5.2 7.7
## 19 bream 575.0 31.3 34.0 39.5 38.3 14.1 2.7 5.5 8.2
## 20 bream 685.0 31.4 34.0 39.2 40.8 13.7 2.6 5.2 7.8
## 21 bream 620.0 31.5 34.5 39.7 39.1 13.3 3.0 5.2 8.2
## 22 bream 680.0 31.8 35.0 40.6 38.1 15.1 3.2 5.6 8.8
## 23 bream 700.0 31.9 35.0 40.5 40.1 13.8 3.1 5.5 8.6
## 24 bream 725.0 31.8 35.0 40.9 40.0 14.8 3.2 5.9 9.1
## 25 bream 720.0 32.0 35.0 40.6 40.3 15.0 3.0 5.6 8.6
## 26 bream 714.0 32.7 36.0 41.5 39.8 14.1 3.3 5.5 8.8
## 27 bream 850.0 32.8 36.0 41.6 40.6 14.9 3.2 5.6 8.8
## 28 bream 1000.0 33.5 37.0 42.6 44.5 15.5 3.5 5.6 9.1
## 29 bream 920.0 35.0 38.5 44.1 40.9 14.3 3.5 5.6 9.1
## 30 bream 955.0 35.0 38.5 44.0 41.1 14.3 3.5 5.5 9.0
## 31 bream 925.0 36.2 39.5 45.3 41.4 14.9 3.3 5.8 9.1
## 32 bream 975.0 37.4 41.0 45.9 40.6 14.7 3.6 4.9 8.5
## 33 bream 950.0 38.0 41.0 46.5 37.9 13.7 3.0 5.5 8.5
## 34 white 270.0 23.6 26.0 28.7 29.2 14.8 2.4 2.7 5.1
## 35 white 270.0 24.1 26.5 29.3 27.8 14.5 2.4 2.8 5.2
## 36 white 306.0 25.6 28.0 30.8 28.5 15.2 2.4 2.8 5.2
## 37 white 540.0 28.5 31.0 34.0 31.6 19.3 2.5 3.0 5.5
## 38 white 1000.0 37.3 40.0 43.5 28.4 15.0 2.7 3.5 6.2
## 39 roach 40.0 12.9 14.1 16.2 25.6 14.0 1.2 2.1 3.3
## 40 roach 69.0 16.5 18.2 20.3 26.1 13.9 1.7 2.1 3.8
## 41 roach 78.0 17.5 18.8 21.2 26.3 13.7 1.3 2.4 3.7
## 42 roach 87.0 18.2 19.8 22.2 25.3 14.3 1.6 2.4 4.0
## 43 roach 120.0 18.6 20.0 22.2 28.0 16.1 1.4 2.2 3.6
## 44 roach 118.0 19.0 20.5 22.8 28.4 14.7 1.5 2.3 3.8
## 45 roach 110.0 19.1 20.8 23.1 26.7 14.7 1.7 2.3 4.0
## 46 roach 120.0 19.4 21.0 23.7 25.8 13.9 1.6 2.7 4.3
## 47 roach 160.0 20.5 22.5 25.3 27.8 15.1 2.0 2.8 4.8
## 48 roach 140.0 21.0 22.5 25.0 26.2 13.3 1.5 2.5 4.0
## 49 roach 160.0 21.1 22.5 25.0 25.6 15.2 1.4 2.5 3.9
## 50 roach 169.0 22.0 24.0 27.2 27.7 14.1 2.0 3.2 5.2
## 51 roach 161.0 22.0 23.4 26.7 25.9 13.6 1.4 3.3 4.7
## 52 roach 200.0 22.1 23.5 26.8 27.6 15.4 1.4 3.3 4.7
## 53 roach 180.0 23.6 25.2 27.9 25.4 14.0 1.6 2.7 4.3
## 54 roach 290.0 24.0 26.0 29.2 30.4 15.4 2.0 3.2 5.2
## 55 roach 272.0 25.0 27.0 30.6 28.0 15.6 2.0 3.6 5.6
## 56 roach 390.0 29.5 31.7 35.0 27.1 15.3 2.2 3.3 5.5
## 57 parki 55.0 13.5 14.7 16.5 41.5 14.1 1.2 1.8 3.0
## 58 parki 60.0 14.3 15.5 17.4 37.8 13.3 1.2 1.9 3.1
## 59 parki 90.0 16.3 17.7 19.8 37.4 13.5 1.4 2.1 3.5
## 60 parki 120.0 17.5 19.0 21.3 39.4 13.7 1.5 2.3 3.8
## 61 parki 150.0 18.4 20.0 22.4 39.7 14.7 1.6 2.4 4.0
## 62 parki 140.0 19.0 20.7 23.2 36.8 14.2 1.7 2.5 4.2
## 63 parki 170.0 19.0 20.7 23.2 40.5 14.7 1.7 2.5 4.2
## 64 parki 200.0 21.2 23.0 25.8 40.1 14.2 1.8 2.8 4.6
## 65 parki 273.0 23.0 25.0 28.0 39.6 14.8 2.0 3.0 5.0
## 66 parki 300.0 24.0 26.0 29.0 39.2 14.6 2.0 3.0 5.0
## 67 smelt 6.7 9.3 9.8 10.8 16.1 9.7 0.5 1.0 1.5
## 68 smelt 7.5 10.0 10.5 11.6 17.0 10.0 0.5 1.1 1.6
## 69 smelt 7.0 10.1 10.6 11.6 14.9 9.9 0.5 1.0 1.5
## 70 smelt 9.7 10.4 11.0 12.0 18.3 11.5 0.6 1.0 1.6
## 71 smelt 10.0 11.3 11.8 13.1 16.9 9.8 0.5 1.3 1.8
## 72 smelt 9.9 11.3 11.8 13.1 16.9 8.9 0.5 1.3 1.8
## 73 smelt 9.8 11.4 12.0 13.2 16.7 8.7 0.6 1.2 1.8
## 74 smelt 12.2 11.5 12.2 13.4 15.6 10.4 0.7 1.2 1.9
## 75 smelt 13.4 11.7 12.4 13.5 18.0 9.4 0.7 1.1 1.8
## 76 smelt 12.2 12.1 13.0 13.8 16.5 9.1 0.9 0.8 1.7
## 77 smelt 19.7 13.2 14.3 15.2 18.9 13.6 1.1 0.9 2.0
## 78 smelt 19.9 13.8 15.0 16.2 18.1 11.6 1.2 1.2 2.4
## 79 pike 200.0 30.0 32.3 34.8 16.0 9.7 2.3 2.5 4.8
## 80 pike 300.0 31.7 34.0 37.8 15.1 11.0 2.3 3.8 6.1
## 81 pike 300.0 32.7 35.0 38.8 15.3 11.3 2.3 3.8 6.1
## 82 pike 300.0 34.8 37.3 39.8 15.8 10.1 2.5 2.5 5.0
## 83 pike 430.0 35.5 38.0 40.5 18.0 11.3 2.5 2.5 5.0
## 84 pike 456.0 40.0 42.5 45.5 16.0 9.5 2.5 3.0 5.5
## 85 pike 510.0 40.0 42.5 45.5 15.0 9.8 2.5 3.0 5.5
## 86 pike 540.0 40.1 43.0 45.8 17.0 11.2 2.9 2.8 5.7
## 87 pike 500.0 42.0 45.0 48.0 14.5 10.2 3.0 3.0 6.0
## 88 pike 567.0 43.2 46.0 48.7 16.0 10.0 2.8 2.7 5.5
## 89 pike 770.0 44.8 48.0 51.2 15.0 10.5 3.2 3.2 6.4
## 90 pike 950.0 48.3 51.7 55.1 16.2 11.2 3.4 3.4 6.8
## 91 pike 1250.0 52.0 56.0 59.7 17.9 11.7 4.0 3.7 7.7
## 92 pike 1600.0 56.0 60.0 64.0 15.0 9.6 4.0 4.0 8.0
## 93 pike 1550.0 56.0 60.0 64.0 15.0 9.6 4.0 4.0 8.0
## 94 pike 1650.0 59.0 63.4 68.0 15.9 11.0 4.4 4.6 9.0
## 95 perch 5.9 7.5 8.4 8.8 24.0 16.0 0.9 0.4 1.3
## 96 perch 32.0 12.5 13.7 14.7 24.0 13.6 1.2 1.0 2.2
## 97 perch 40.0 13.8 15.0 16.0 23.9 15.2 1.2 1.0 2.2
## 98 perch 51.5 15.0 16.2 17.2 26.7 15.3 1.2 1.0 2.2
## 99 perch 70.0 15.7 17.4 18.5 24.8 15.9 1.7 1.1 2.8
## 100 perch 100.0 16.2 18.0 19.2 27.2 17.3 1.8 1.2 3.0
## 101 perch 78.0 16.8 18.7 19.4 26.8 16.1 1.9 0.7 2.6
## 102 perch 80.0 17.2 19.0 20.2 27.9 15.1 1.8 1.2 3.0
## 103 perch 85.0 17.8 19.6 20.8 24.7 14.6 1.8 1.2 3.0
## 104 perch 85.0 18.2 20.0 21.0 24.2 13.2 1.8 1.0 2.8
## 105 perch 110.0 19.0 21.0 22.5 25.3 15.8 2.0 1.5 3.5
## 106 perch 115.0 19.0 21.0 22.5 26.3 14.7 2.0 1.5 3.5
## 107 perch 125.0 19.0 21.0 22.5 25.3 16.3 2.0 1.5 3.5
## 108 perch 130.0 19.3 21.3 22.8 28.0 15.5 2.0 1.5 3.5
## 109 perch 120.0 20.0 22.0 23.5 26.0 14.5 2.0 1.5 3.5
## 110 perch 120.0 20.0 22.0 23.5 24.0 15.0 2.0 1.5 3.5
## 111 perch 130.0 20.0 22.0 23.5 26.0 15.0 2.0 1.5 3.5
## 112 perch 135.0 20.0 22.0 23.5 25.0 15.0 2.0 1.5 3.5
## 113 perch 110.0 20.0 22.0 23.5 23.5 17.0 2.0 1.5 3.5
## 114 perch 130.0 20.5 22.5 24.0 24.4 15.1 2.0 1.5 3.5
## 115 perch 150.0 20.5 22.5 24.0 28.3 15.1 2.0 1.5 3.5
## 116 perch 145.0 20.7 22.7 24.2 24.6 15.0 2.0 1.5 3.5
## 117 perch 150.0 21.0 23.0 24.5 21.3 14.8 2.0 1.5 3.5
## 118 perch 170.0 21.5 23.5 25.0 25.1 14.9 2.0 1.5 3.5
## 119 perch 225.0 22.0 24.0 25.5 28.6 14.6 2.0 1.5 3.5
## 120 perch 145.0 22.0 24.0 25.5 25.0 15.0 2.0 1.5 3.5
## 121 perch 188.0 22.6 24.6 26.2 25.7 15.9 2.0 1.6 3.6
## 122 perch 180.0 23.0 25.0 26.5 24.3 13.9 2.0 1.5 3.5
## 123 perch 197.0 23.5 25.6 27.0 24.3 15.7 2.1 1.4 3.5
## 124 perch 218.0 25.0 26.5 28.0 25.6 14.8 1.5 1.5 3.0
## 125 perch 300.0 25.2 27.3 28.7 29.0 17.9 2.1 1.4 3.5
## 126 perch 260.0 25.4 27.5 28.9 24.8 15.0 2.1 1.4 3.5
## 127 perch 265.0 25.4 27.5 28.9 24.4 15.0 2.1 1.4 3.5
## 128 perch 250.0 25.4 27.5 28.9 25.2 15.8 2.1 1.4 3.5
## 129 perch 250.0 25.9 28.0 29.4 26.6 14.3 2.1 1.4 3.5
## 130 perch 300.0 26.9 28.7 30.1 25.2 15.4 1.8 1.4 3.2
## 131 perch 320.0 27.8 30.0 31.6 24.1 15.1 2.2 1.6 3.8
## 132 perch 514.0 30.5 32.8 34.0 29.5 17.7 2.3 1.2 3.5
## 133 perch 556.0 32.0 34.5 36.5 28.1 17.5 2.5 2.0 4.5
## 134 perch 840.0 32.5 35.0 37.3 30.8 20.9 2.5 2.3 4.8
## 135 perch 685.0 34.0 36.5 39.0 27.9 17.6 2.5 2.5 5.0
## 136 perch 700.0 34.0 36.0 38.3 27.7 17.6 2.0 2.3 4.3
## 137 perch 700.0 34.5 37.0 39.4 27.5 15.9 2.5 2.4 4.9
## 138 perch 690.0 34.6 37.0 39.3 26.9 16.2 2.4 2.3 4.7
## 139 perch 900.0 36.5 39.0 41.4 26.9 18.1 2.5 2.4 4.9
## 140 perch 650.0 36.5 39.0 41.4 26.9 14.5 2.5 2.4 4.9
## 141 perch 820.0 36.6 39.0 41.3 30.1 17.8 2.4 2.3 4.7
## 142 perch 850.0 36.9 40.0 42.3 28.2 16.8 3.1 2.3 5.4
## 143 perch 820.0 37.1 40.0 42.5 26.2 15.6 2.9 2.5 5.4
## 144 perch 1100.0 39.0 42.0 44.6 28.7 15.4 3.0 2.6 5.6
## 145 perch 1000.0 39.8 43.0 45.2 26.4 16.1 3.2 2.2 5.4
## 146 perch 1100.0 40.1 43.0 45.5 27.5 16.3 2.9 2.5 5.4
## 147 perch 1000.0 40.2 43.5 46.0 27.4 17.7 3.3 2.5 5.8
## 148 perch 1000.0 41.1 44.0 46.6 26.8 16.3 2.9 2.6 5.5
newfish.lda<-lda(Species~.,data=newfish)
## Warning in lda.default(x, grouping, ...): variables are collinear
newfish.lda<-lda(Species~Weight+L1+Height+Width+L21+L32,data=newfish)
newfish.lda
## Call:
## lda(Species ~ Weight + L1 + Height + Width + L21 + L32, data = newfish)
##
## Prior probabilities of groups:
## bream parki perch pike roach smelt
## 0.22297297 0.06756757 0.36486486 0.10810811 0.12162162 0.08108108
## white
## 0.03378378
##
## Group means:
## Weight L1 Height Width L21 L32
## bream 636.1818 30.60606 39.52727 14.10000 2.8060606 5.272727
## parki 155.8000 18.62000 39.20000 14.18000 1.6100000 2.430000
## perch 360.9333 25.31852 26.17778 15.78519 2.1259259 1.650000
## pike 742.0625 42.88125 15.85625 10.48125 3.0375000 3.281250
## roach 159.1111 20.66667 26.88333 14.57222 1.6388889 2.716667
## smelt 11.5000 11.34167 16.99167 10.21667 0.6916667 1.091667
## white 477.2000 27.82000 29.10000 15.76000 2.4800000 2.960000
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4 LD5
## Weight 0.000911022 -0.002710071 0.007553399 0.001688806 0.006182751
## L1 0.132200166 0.036926540 -0.259794107 -0.235599786 -0.330471903
## Height -0.618519868 -0.332732865 -0.053863042 -0.330737436 -0.029226039
## Width 0.464670922 -0.341184928 -0.353062958 0.842951264 -0.201141743
## L21 -0.114071841 0.712452136 -2.278059990 0.277900320 2.700516892
## L32 -2.311243186 2.141452146 0.539501848 1.803654269 -0.461925634
## LD6
## Weight -0.003600115
## L1 -0.119589009
## Height -0.019796935
## Width -0.159484049
## L21 2.813216431
## L32 -0.080912628
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5 LD6
## 0.7998 0.1327 0.0473 0.0167 0.0035 0.0000
newfish.ldapred<-predict(newfish.lda,newfish[,-1])
table(newfish$Species,newfish.ldapred$class)
##
## bream parki perch pike roach smelt white
## bream 33 0 0 0 0 0 0
## parki 0 10 0 0 0 0 0
## perch 0 0 54 0 0 0 0
## pike 0 0 0 16 0 0 0
## roach 0 0 0 0 18 0 0
## smelt 0 0 0 0 0 12 0
## white 0 0 0 0 1 0 4
newfish.ldacv<-lda(Species~Weight+L1+Height+Width+L21+L32,data=newfish,CV=T)
table(newfish$Species,newfish.ldacv$class)
##
## bream parki perch pike roach smelt white
## bream 33 0 0 0 0 0 0
## parki 0 10 0 0 0 0 0
## perch 0 0 54 0 0 0 0
## pike 0 0 0 16 0 0 0
## roach 0 0 0 0 18 0 0
## smelt 0 0 0 0 0 12 0
## white 0 0 0 0 1 0 4
# The true error rate remains to be 0.6%
eqscplot(newfish.ldapred$x,type="n",xlab="1st LD",ylab="2nd LD")
fish.species <- c(rep("B",33),rep("W",5),rep("R",18),rep("Pa",10),rep("S",12),rep("Pi",16),rep("Pe",54))
fish.colors <- c(rep(1,33),rep(2,5),rep(3,18),rep(4,10),rep(5,12),rep(6,16),rep(7,54))
text(newfish.ldapred$x[,1:2],fish.species,col=fish.colors)

#To predict the class identities of the new data points we use:
newfish.ldatest<-predict(newfish.lda,newfish.test)
newfish.ldatest$class
## [1] bream bream perch perch pike smelt smelt parki roach roach white
## Levels: bream parki perch pike roach smelt white
#We see that the results agree with those obtained from the classification tree.
#Let us examine how to apply QDA to this dataset.
Quadratic Discriminant Analysis
#newfish.qda<-qda(Species~.,data=newfish)
newfish.q<-read.table("file:///C:/Users/Asus/Documents/GitHub/classifng_fish/newfish.qdata.txt",h=T)
library(MVN)
## sROC 0.1-2 loaded
#Running (i) Mardia's; (ii) Henze-Zirkler's and (iii) Royston's Multivariate
#Normality Test:
mvn(data = newfish.q[,-c(1,8,9,10)], mvnTest = "ma")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 426.417978948719 2.01256215659792e-58 NO
## 2 Mardia Kurtosis 1.58569973539399 0.112807439232689 YES
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk Weight 0.8780 <0.001 NO
## 2 Shapiro-Wilk L1 0.9679 0.0019 NO
## 3 Shapiro-Wilk L2 0.9702 0.0033 NO
## 4 Shapiro-Wilk L3 0.9703 0.0033 NO
## 5 Shapiro-Wilk Height 0.9125 <0.001 NO
## 6 Shapiro-Wilk Width 0.9344 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## Weight 143 398.02378 360.51374 272.0 5.9 1650.0 120.00 650.0 1.1175913
## L1 143 26.27692 10.16502 25.2 7.5 59.0 19.00 32.6 0.6215598
## L2 143 28.44406 10.88848 27.3 8.4 63.4 20.90 35.0 0.5796666
## L3 143 31.25455 11.82529 29.2 8.8 68.0 22.80 39.6 0.4306508
## Height 143 28.33217 8.39113 26.8 14.5 44.5 24.25 37.8 0.1335468
## Width 143 14.07063 2.23310 14.6 8.7 20.9 13.40 15.3 -0.4962807
## Kurtosis
## Weight 0.89652958
## L1 0.35891664
## L2 0.32535350
## L3 -0.02200252
## Height -1.07931595
## Width 0.27826140
mvn(data = newfish.q[,-c(1,8,9,10)], mvnTest = "hz")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 4.496681 0 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk Weight 0.8780 <0.001 NO
## 2 Shapiro-Wilk L1 0.9679 0.0019 NO
## 3 Shapiro-Wilk L2 0.9702 0.0033 NO
## 4 Shapiro-Wilk L3 0.9703 0.0033 NO
## 5 Shapiro-Wilk Height 0.9125 <0.001 NO
## 6 Shapiro-Wilk Width 0.9344 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## Weight 143 398.02378 360.51374 272.0 5.9 1650.0 120.00 650.0 1.1175913
## L1 143 26.27692 10.16502 25.2 7.5 59.0 19.00 32.6 0.6215598
## L2 143 28.44406 10.88848 27.3 8.4 63.4 20.90 35.0 0.5796666
## L3 143 31.25455 11.82529 29.2 8.8 68.0 22.80 39.6 0.4306508
## Height 143 28.33217 8.39113 26.8 14.5 44.5 24.25 37.8 0.1335468
## Width 143 14.07063 2.23310 14.6 8.7 20.9 13.40 15.3 -0.4962807
## Kurtosis
## Weight 0.89652958
## L1 0.35891664
## L2 0.32535350
## L3 -0.02200252
## Height -1.07931595
## Width 0.27826140
mvn(data = newfish.q[,-c(1,8,9,10)], mvnTest = "royston")
## $multivariateNormality
## Test H p value MVN
## 1 Royston 46.25164 2.211833e-10 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk Weight 0.8780 <0.001 NO
## 2 Shapiro-Wilk L1 0.9679 0.0019 NO
## 3 Shapiro-Wilk L2 0.9702 0.0033 NO
## 4 Shapiro-Wilk L3 0.9703 0.0033 NO
## 5 Shapiro-Wilk Height 0.9125 <0.001 NO
## 6 Shapiro-Wilk Width 0.9344 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## Weight 143 398.02378 360.51374 272.0 5.9 1650.0 120.00 650.0 1.1175913
## L1 143 26.27692 10.16502 25.2 7.5 59.0 19.00 32.6 0.6215598
## L2 143 28.44406 10.88848 27.3 8.4 63.4 20.90 35.0 0.5796666
## L3 143 31.25455 11.82529 29.2 8.8 68.0 22.80 39.6 0.4306508
## Height 143 28.33217 8.39113 26.8 14.5 44.5 24.25 37.8 0.1335468
## Width 143 14.07063 2.23310 14.6 8.7 20.9 13.40 15.3 -0.4962807
## Kurtosis
## Weight 0.89652958
## L1 0.35891664
## L2 0.32535350
## L3 -0.02200252
## Height -1.07931595
## Width 0.27826140
#newfish.qda<-qda(Species~.,data=newfish.q)
newfish.qda<-qda(Species~Weight+L1+Height+Width+L21+L32,data=newfish.q)
newfish.qdapred<-predict(newfish.qda,newfish.q)
predict(newfish.qda,newfish.test)$class
## [1] bream bream perch perch pike smelt smelt parki roach roach perch
## Levels: bream parki perch pike roach smelt
newfish.qda<-qda(Species~Weight+L1+Height+Width+L21+L32,data=newfish.q,CV=T)
table(newfish.q$Species,newfish.qda$class)
##
## bream parki perch pike roach smelt
## bream 33 0 0 0 0 0
## parki 0 10 0 0 0 0
## perch 0 0 54 0 0 0
## pike 0 0 0 16 0 0
## roach 0 0 1 0 17 0
## smelt 0 0 1 0 0 11
Nearest Neighbor Methods
library(class)
newfish.knn <- knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=3,prob=T)
table(newfish$Species,newfish.knn)
## newfish.knn
## bream parki perch pike roach smelt white
## bream 30 1 2 0 0 0 0
## parki 0 5 2 0 3 0 0
## perch 4 0 47 0 2 1 0
## pike 1 0 3 12 0 0 0
## roach 1 0 9 0 7 0 1
## smelt 0 0 0 0 0 12 0
## white 0 0 2 0 0 0 3
#We see that the apparent error rate for k = 3 is about 21%. For k = 2, we have:
newfish.knn<-knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=2,prob=T)
table(newfish$Species,newfish.knn)
## newfish.knn
## bream parki perch pike roach smelt white
## bream 29 0 2 0 2 0 0
## parki 0 7 0 0 2 0 1
## perch 2 0 47 0 4 1 0
## pike 2 0 3 11 0 0 0
## roach 0 0 5 0 12 0 1
## smelt 0 0 0 0 0 12 0
## white 0 0 1 0 0 0 4
#k=1
newfish.knn <- knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=1,prob=T)
table(newfish$Species,newfish.knn)
## newfish.knn
## bream parki perch pike roach smelt white
## bream 33 0 0 0 0 0 0
## parki 0 10 0 0 0 0 0
## perch 0 0 54 0 0 0 0
## pike 0 0 0 16 0 0 0
## roach 0 0 0 0 18 0 0
## smelt 0 0 0 0 0 12 0
## white 0 0 0 0 0 0 5
newfish1 <- newfish[,c(1,2,3,6,8,9)]
newfish.knncv <- knn.cv(newfish1[,2:6],newfish1[,"Species"],k=1,prob=T)
table(newfish1$Species,newfish.knncv)
## newfish.knncv
## bream parki perch pike roach smelt white
## bream 26 0 4 0 2 0 1
## parki 1 4 0 0 4 0 1
## perch 3 0 37 0 11 1 2
## pike 2 0 4 9 0 0 1
## roach 2 0 10 0 5 0 1
## smelt 0 0 0 0 0 12 0
## white 0 0 3 0 0 0 2
newfish1.test<-newfish.test[,c(1,2,5,7,8)]
newfish.knntest<-knn(newfish1[,2:6],newfish1.test,newfish1[,"Species"],k=1,prob=T)
newfish.knntest
## [1] bream bream perch white perch smelt smelt parki perch perch perch
## attr(,"prob")
## [1] 1 1 1 1 1 1 1 1 1 1 1
## Levels: bream parki perch pike roach smelt white
Logistic Discrimination
library(nnet)
newfish.logd<-multinom(Species~.,data=newfish,maxit=250)
## # weights: 77 (60 variable)
## initial value 287.994702
## iter 10 value 189.100680
## iter 20 value 82.739762
## iter 30 value 15.668415
## iter 40 value 0.165377
## iter 50 value 0.003851
## final value 0.000000
## converged
newfish.logd
## Call:
## multinom(formula = Species ~ ., data = newfish, maxit = 250)
##
## Coefficients:
## (Intercept) Weight L1 L2 L3 Height
## parki -29.45533 0.02917110 6.349592 17.8259067 -23.500970 9.645257
## perch -80.11405 0.16021628 3.267803 56.6489218 -53.765483 6.684178
## pike 15.22567 -0.05874368 8.093673 0.9753102 -3.095179 -13.084687
## roach -277.16410 -0.51539078 54.195310 -43.6844449 4.362472 -2.952463
## smelt 455.64639 0.18459382 29.363751 -20.5072505 -10.290211 -13.228223
## white -57.01255 0.19991067 -17.467222 31.7667561 -20.454096 -4.118171
## Width L21 L32 L31
## parki 3.247584 11.476314 -41.326877 -29.850563
## perch 21.052273 53.381119 -110.414404 -57.033286
## pike 21.652958 -7.118363 -4.070489 -11.188852
## roach 40.080837 -97.879755 48.046917 -49.832838
## smelt 18.368009 -49.871001 10.217040 -39.653961
## white 26.549555 49.233978 -52.220852 -2.986874
##
## Residual Deviance: 2.009681e-11
## AIC: 84
table(newfish$Species,predict(newfish.logd,newfish))
##
## bream parki perch pike roach smelt white
## bream 33 0 0 0 0 0 0
## parki 0 10 0 0 0 0 0
## perch 0 0 54 0 0 0 0
## pike 0 0 0 16 0 0 0
## roach 0 0 0 0 18 0 0
## smelt 0 0 0 0 0 12 0
## white 0 0 0 0 0 0 5
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
x <- as.matrix(newfish[,-1])
y <- newfish$Species
cvfit <- cv.glmnet(x, y, family="multinomial", type.measure="class", nfolds=148)
predict.value <- predict(cvfit, x, s = "lambda.min", type = "class")
table(predict.value,newfish$Species)
##
## predict.value bream parki perch pike roach smelt white
## bream 33 0 0 0 0 0 0
## parki 0 10 0 0 0 0 0
## perch 0 0 54 0 0 0 0
## pike 0 0 0 16 0 0 0
## roach 0 0 0 0 18 0 0
## smelt 0 0 0 0 0 12 0
## white 0 0 0 0 0 0 5
predict(newfish.logd,newfish.test)
## [1] bream bream perch perch pike smelt smelt parki roach roach white
## Levels: bream parki perch pike roach smelt white