Prana Ugiana Gio
Elly Rosmaini Siregar
Faigiziduhu Bu’ulolo
Department of Mathematics
University of Sumatera Utara
3rd, International Seminar on Operational Research
Medan, August 21-23, 2015
LDA and classification trees can be used to predict a person:
The use of LDA will generate a discriminant function, which is a linear function. This function can be used to classify an object enter into one of group of dependent variable. Consider the following data.
#Read or open data "plot interior.csv"
data=read.csv("plot interior.csv")
#Show the data
data## Y X1 X2
## 1 A 1.0 0.5
## 2 A 1.2 0.6
## 3 A 1.4 0.5
## 4 B 1.5 0.6
## 5 A 2.0 0.6
## 6 A 2.5 0.9
## 7 A 2.3 1.1
## 8 A 2.4 1.1
## 9 A 2.1 0.6
## 10 A 1.9 0.8
## 11 B 1.0 0.8
## 12 B 1.2 1.0
## 13 B 1.4 1.2
## 14 B 1.5 1.0
## 15 B 2.0 1.7
## 16 A 2.5 2.2
## 17 B 2.3 2.0
## 18 B 2.4 2.1
## 19 B 2.1 1.9
## 20 B 1.9 1.5
#Plot Data
library(ggplot2)
ggplot(data, aes(X1, X2)) + geom_point(aes(color = Y,
shape = Y)) + geom_text(data = NULL, x = 1.6, y = 2, label = "In general, green point (tree angle) is positioned above,", colour="blue") + geom_text(data = NULL, x = 1.5, y = 1.9, label = "whereas red point (circle) is positioned below", colour="blue")#Perform LDA
library(MASS) #Load package 'MASS'## Warning: package 'MASS' was built under R version 3.2.2
fit.LDA = lda( Y ~ X1 + X2, data)
fit.LDA## Call:
## lda(Y ~ X1 + X2, data = data)
##
## Prior probabilities of groups:
## A B
## 0.5 0.5
##
## Group means:
## X1 X2
## A 1.93 0.89
## B 1.73 1.38
##
## Coefficients of linear discriminants:
## LD1
## X1 -2.683033
## X2 2.937740
#Perform classification
class.LDA.C = predict(fit.LDA, data[,c(2,3)])$class
class.LDA.C## [1] B B A A A A A A A A B B B B B B B B B B
## Levels: A B
table(data[,1],class.LDA.C)## class.LDA.C
## A B
## A 7 3
## B 1 9
#Perform classification trees
library(rpart)
tree <- rpart(Y ~ X1 + X2, data)
library(rpart.plot)
prp(tree, faclen = 0, cex = 0.8, extra = 1)#Prediction
treePrediction <- predict(tree, data, type = "class")
treePrediction## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## A A A A A A A A A A A A B A B B B B B B
## Levels: A B
#load package caret
library(caret)## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.2
confusionMatrix(treePrediction, data$Y)## Confusion Matrix and Statistics
##
## Reference
## Prediction A B
## A 9 4
## B 1 6
##
## Accuracy : 0.75
## 95% CI : (0.509, 0.9134)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 0.02069
##
## Kappa : 0.5
## Mcnemar's Test P-Value : 0.37109
##
## Sensitivity : 0.9000
## Specificity : 0.6000
## Pos Pred Value : 0.6923
## Neg Pred Value : 0.8571
## Prevalence : 0.5000
## Detection Rate : 0.4500
## Detection Prevalence : 0.6500
## Balanced Accuracy : 0.7500
##
## 'Positive' Class : A
##
## Warning: package 'class' was built under R version 3.2.2
## Y X1 X2
## 1 A 0.10 2.10
## 2 A 0.20 2.20
## 3 A 0.30 2.30
## 4 A 0.40 2.40
## 5 A 0.50 2.50
## 6 A 0.60 2.60
## 7 A 0.70 2.70
## 8 A 0.80 2.80
## 9 A 0.90 2.90
## 10 A 0.10 3.00
## 11 A 0.20 3.10
## 12 A 0.30 3.20
## 13 A 0.40 3.30
## 14 A 0.50 3.40
## 15 A 0.60 3.50
## 16 A 0.70 3.60
## 17 A 0.80 3.70
## 18 A 0.90 3.80
## 19 A 0.21 3.90
## 20 A 0.15 3.71
## 21 A 0.65 2.14
## 22 A 0.12 2.17
## 23 A 0.11 2.53
## 24 A 0.12 2.42
## 25 A 0.74 2.49
## 26 A 0.43 3.21
## 27 A 0.23 3.94
## 28 A 0.17 2.67
## 29 A 0.58 3.72
## 30 A 0.95 2.19
## 31 A 0.76 2.53
## 32 A 0.45 2.21
## 33 A 0.76 3.64
## 34 A 0.23 3.32
## 35 A 0.77 3.16
## 36 A 0.87 3.19
## 37 A 0.73 2.89
## 38 A 0.21 2.24
## 39 A 0.04 3.51
## 40 A 0.16 2.13
## 41 A 1.10 0.10
## 42 A 1.20 0.20
## 43 A 1.30 0.30
## 44 A 1.40 0.40
## 45 A 1.50 0.50
## 46 A 1.60 0.60
## 47 A 1.70 0.70
## 48 A 1.80 0.80
## 49 A 1.90 0.90
## 50 A 1.10 0.12
## 51 A 1.20 0.27
## 52 A 1.30 0.36
## 53 A 1.40 0.48
## 54 A 1.50 0.79
## 55 A 1.60 0.15
## 56 A 1.70 1.27
## 57 A 1.80 1.93
## 58 A 1.90 0.30
## 59 A 1.21 1.54
## 60 A 1.15 1.71
## 61 A 1.65 1.14
## 62 A 1.12 1.17
## 63 A 1.11 1.53
## 64 A 1.12 1.42
## 65 A 1.74 1.49
## 66 A 1.43 1.21
## 67 A 1.23 1.94
## 68 A 1.17 1.67
## 69 A 1.58 1.72
## 70 A 1.95 1.19
## 71 A 1.76 1.53
## 72 A 1.45 1.21
## 73 A 1.76 1.64
## 74 A 1.23 1.32
## 75 A 1.77 1.16
## 76 A 1.87 1.19
## 77 A 1.73 1.89
## 78 A 1.21 1.24
## 79 A 1.04 1.51
## 80 A 1.16 1.13
## 81 A 2.10 2.10
## 82 A 2.20 2.20
## 83 A 2.30 2.30
## 84 A 2.40 2.40
## 85 A 2.50 2.50
## 86 A 2.60 2.60
## 87 A 2.70 2.70
## 88 A 2.80 2.80
## 89 A 2.90 2.90
## 90 A 2.10 3.00
## 91 A 2.20 3.10
## 92 A 2.30 3.20
## 93 A 2.40 3.30
## 94 A 2.50 3.40
## 95 A 2.60 3.50
## 96 A 2.70 3.60
## 97 A 2.80 3.70
## 98 A 2.90 3.80
## 99 A 2.21 3.90
## 100 A 2.15 3.71
## 101 A 2.65 2.14
## 102 A 2.12 2.17
## 103 A 2.11 2.53
## 104 A 2.12 2.42
## 105 A 2.74 2.49
## 106 A 2.43 3.21
## 107 A 2.23 3.94
## 108 A 2.17 2.67
## 109 A 2.58 3.72
## 110 A 2.95 2.19
## 111 A 2.76 2.53
## 112 A 2.45 2.21
## 113 A 2.76 3.64
## 114 A 2.23 3.32
## 115 A 2.77 3.16
## 116 A 2.87 3.19
## 117 A 2.73 2.89
## 118 A 2.21 2.24
## 119 A 2.04 3.51
## 120 A 2.16 2.13
## 121 B 0.10 0.10
## 122 B 0.20 0.20
## 123 B 0.30 0.30
## 124 B 0.40 0.40
## 125 B 0.50 0.50
## 126 B 0.60 0.60
## 127 B 0.70 0.70
## 128 B 0.80 0.80
## 129 B 0.90 0.90
## 130 B 0.10 0.12
## 131 B 0.20 0.27
## 132 B 0.30 0.36
## 133 B 0.40 0.48
## 134 B 0.50 0.79
## 135 B 0.60 0.15
## 136 B 0.70 1.27
## 137 B 0.80 1.93
## 138 B 0.90 0.30
## 139 B 0.21 1.54
## 140 B 0.15 1.71
## 141 B 0.65 1.14
## 142 B 0.12 1.17
## 143 B 0.11 1.53
## 144 B 0.12 1.42
## 145 B 0.74 1.49
## 146 B 0.43 1.21
## 147 B 0.23 1.94
## 148 B 0.17 1.67
## 149 B 0.58 1.72
## 150 B 0.95 1.19
## 151 B 0.76 1.53
## 152 B 0.45 1.21
## 153 B 0.76 1.64
## 154 B 0.23 1.32
## 155 B 0.77 1.16
## 156 B 0.87 1.19
## 157 B 0.73 1.89
## 158 B 0.21 1.24
## 159 B 0.04 1.51
## 160 B 0.16 1.13
## 161 B 1.10 2.10
## 162 B 1.20 2.20
## 163 B 1.30 2.30
## 164 B 1.40 2.40
## 165 B 1.50 2.50
## 166 B 1.60 2.60
## 167 B 1.70 2.70
## 168 B 1.80 2.80
## 169 B 1.90 2.90
## 170 B 1.10 3.00
## 171 B 1.20 3.10
## 172 B 1.30 3.20
## 173 B 1.40 3.30
## 174 B 1.50 3.40
## 175 B 1.60 3.50
## 176 B 1.70 3.60
## 177 B 1.80 3.70
## 178 B 1.90 3.80
## 179 B 1.21 3.90
## 180 B 1.15 3.71
## 181 B 1.65 2.14
## 182 B 1.12 2.17
## 183 B 1.11 2.53
## 184 B 1.12 2.42
## 185 B 1.74 2.49
## 186 B 1.43 3.21
## 187 B 1.23 3.94
## 188 B 1.17 2.67
## 189 B 1.58 3.72
## 190 B 1.95 2.19
## 191 B 1.76 2.53
## 192 B 1.45 2.21
## 193 B 1.76 3.64
## 194 B 1.23 3.32
## 195 B 1.77 3.16
## 196 B 1.87 3.19
## 197 B 1.73 2.89
## 198 B 1.21 2.24
## 199 B 1.04 3.51
## 200 B 1.16 2.13
## 201 B 2.10 0.10
## 202 B 2.20 0.20
## 203 B 2.30 0.30
## 204 B 2.40 0.40
## 205 B 2.50 0.50
## 206 B 2.60 0.60
## 207 B 2.70 0.70
## 208 B 2.80 0.80
## 209 B 2.90 0.90
## 210 B 2.10 0.12
## 211 B 2.20 0.27
## 212 B 2.30 0.36
## 213 B 2.40 0.48
## 214 B 2.50 0.79
## 215 B 2.60 0.15
## 216 B 2.70 1.27
## 217 B 2.80 1.93
## 218 B 2.90 0.30
## 219 B 2.21 1.54
## 220 B 2.15 1.71
## 221 B 2.65 1.14
## 222 B 2.12 1.17
## 223 B 2.11 1.53
## 224 B 2.12 1.42
## 225 B 2.74 1.49
## 226 B 2.43 1.21
## 227 B 2.23 1.94
## 228 B 2.17 1.67
## 229 B 2.58 1.72
## 230 B 2.95 1.19
## 231 B 2.76 1.53
## 232 B 2.45 1.21
## 233 B 2.76 1.64
## 234 B 2.23 1.32
## 235 B 2.77 1.16
## 236 B 2.87 1.19
## 237 B 2.73 1.89
## 238 B 2.21 1.24
## 239 B 2.04 1.51
## 240 B 2.16 1.13
## Call:
## lda(Y ~ X1 + X2, data = data1)
##
## Prior probabilities of groups:
## A B
## 0.5 0.5
##
## Group means:
## X1 X2
## A 1.461 2.297833
## B 1.461 1.670417
##
## Coefficients of linear discriminants:
## LD1
## X1 0.02315805
## X2 -0.94483701
## [1] A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A
## [36] A A A A A B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B
## [71] B B B B B B B B B B A A A A A A A A A A A A A A A A A A A A A A A A A
## [106] A A A A A A A A A A A A A A A B B B B B B B B B B B B B B B B B B B B
## [141] B B B B B B B B B B B B B B B B B B B B A A A A A A A A A A A A A A A
## [176] A A A A A A A A A A A A A A A A A A A A A A A A A B B B B B B B B B B
## [211] B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B
## Levels: A B
## class.LDA.C
## A B
## A 80 40
## B 40 80
#Perform classification trees
library(rpart)
tree <- rpart(Y ~ X1 + X2, data1)
library(rpart.plot)
prp(tree, faclen = 0, cex = 0.8, extra = 1)#Prediction
treePrediction <- predict(tree, data1, type = "class")
treePrediction## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## A A A A A A A A A A A A A A A A A A
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## A A A A A A A A A A A A A A A A A A
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## A A A A A A A A A A A A A A A A A A
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## A A A A A A A A A A A A A A A A A A
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## A A A A A A A A A A A A A A A A A A
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## A A A A A A A A A A A A A A A A A A
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## A A A A A A A A A A A A B B B B B B
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## B B B B B B B B B B B B B B B B B B
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## B B B B B B B B B B B B B B B B B B
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## B B B B B B B B B B B B B B B B B B
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## B B B B B B B B B B B B B B B B B B
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## B B B B B B B B B B B B B B B B B B
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## B B B B B B B B B B B B B B B B B B
## 235 236 237 238 239 240
## B B B B B B
## Levels: A B
#load package caret
library(caret)
confusionMatrix(treePrediction, data1$Y)## Confusion Matrix and Statistics
##
## Reference
## Prediction A B
## A 120 0
## B 0 120
##
## Accuracy : 1
## 95% CI : (0.9847, 1)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0
## Specificity : 1.0
## Pos Pred Value : 1.0
## Neg Pred Value : 1.0
## Prevalence : 0.5
## Detection Rate : 0.5
## Detection Prevalence : 0.5
## Balanced Accuracy : 1.0
##
## 'Positive' Class : A
##
One of factor that affect the accuracy of classification prediction is the spread of data.