A Comparison of classifiers

Synthetic Classification Problem

Data set in MASS library, sources:

Ripley, B.D. (1994) Neural networks and related methods for classification (with discussion). Journal of the Royal Statistical Society series B 56, 409–456.

Ripley, B.D. (1996) Pattern Recognition and Neural Networks. Cambridge: Cambridge University Press.

Plot the training set:

library(MASS)
library(ggplot2)
data("synth.tr")


synth.tr$yc <- as.factor(synth.tr$yc)
synth.te$yc <- as.factor(synth.te$yc)

grid <- expand.grid(
xs = seq(-1.5, 1, length = 100),
ys = seq(-0.2, 1.2, length = 100)
)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() 

Logistic Regression

fit.glm <- glm(yc~., data = synth.tr, family = "binomial")
pred <- ifelse(predict(fit.glm, newdata = synth.te, type = "response") >0.5, "1", "0")
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 459  73
##    1  41 427
mean(pred != synth.te$yc) 
## [1] 0.114
grid$pred.glm <- ifelse(predict(fit.glm, newdata = grid, type = "response") >0.5, "1", "0")
ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.glm), data = grid, alpha = .1)

Extend glm with generalized additive models

Use smoothing splines:

library(gam)
## Loading required package: splines
## Loading required package: foreach
## Loaded gam 1.14-4
fit.gam <- gam(yc~ s(xs,df = 5)+ s(ys,df = 5), data = synth.tr,
               family="binomial")
pred <- ifelse(predict(fit.gam, synth.te, type = "response")>0.5, "1", "0")
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 457  46
##    1  43 454
mean(pred != synth.te$yc) # test error
## [1] 0.089
grid$pred.gam <-  ifelse(matrix(predict(fit.gam, grid, type = "response"),dim(grid)[1], 1)>0.5, "1", "0")

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.gam), data = grid, alpha = .1)

anova(fit.glm, fit.gam, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: yc ~ xs + ys
## Model 2: yc ~ s(xs, df = 5) + s(ys, df = 5)
##   Resid. Df Resid. Dev     Df Deviance  Pr(>Chi)    
## 1       247     161.42                              
## 2       239     120.55 7.9997   40.871 2.203e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.gam <- gam(yc~ s(xs,df = 5) + ys, data = synth.tr,
               family="binomial")
pred <- ifelse(predict(fit.gam, synth.te, type = "response")>0.5, "1", "0")
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 459  49
##    1  41 451
mean(pred != synth.te$yc) # test error
## [1] 0.09
grid$pred.gam <-  ifelse(matrix(predict(fit.gam, grid, type = "response"),dim(grid)[1], 1)>0.5, "1", "0")

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.gam), data = grid, alpha = .1)

# Trees

library(tree)
## Warning: package 'tree' was built under R version 3.4.4
fit.tree <- tree(yc~., data = synth.tr)
plot(fit.tree)
text(fit.tree, cex=.5, pretty=0)

pred <- predict(fit.tree, newdata = synth.te, type="class") 
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 442  97
##    1  58 403
mean(pred != synth.te$yc) 
## [1] 0.155
grid$pred.tree <- predict(fit.tree, newdata = grid, type = "class")

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.tree), data = grid, alpha = .1)

Use cross-validation to prune the tree.

cvtree <- cv.tree(fit.tree, FUN=prune.misclass)
plot(cvtree$size,cvtree$dev,type="b")

w <- which.min(cvtree$dev)
cvtree$size[w]
## [1] 6
tree1 <- prune.tree(fit.tree, best=2)

pred <- predict(tree1, newdata = synth.te, type="class") 
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 443  62
##    1  57 438
mean(pred != synth.te$yc) 
## [1] 0.119
grid$pred.tree1 <-  predict(tree1, newdata = grid, type = "class")

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.tree1), data = grid, alpha = .1)

Random forrest

library(randomForest)
fit.rf <- randomForest(yc~., data = synth.tr)
pred <- predict(fit.rf, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 457  60
##    1  43 440
mean(pred != synth.te$yc) # test error
## [1] 0.103
grid$pred.rf <- predict(fit.rf, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.rf), data = grid, alpha = .1)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.te) +
  geom_point() +
  geom_point(aes(color = pred.rf), data = grid, alpha = .1)

We can see from the plots that this overfits the data

QDA

LDA should be similar to logistic regression above

fit.qda <- qda(yc~., data = synth.tr)
pred <- predict(fit.qda, synth.te)$class
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 454  56
##    1  46 444
mean(pred != synth.te$yc) # test error
## [1] 0.102
grid$pred.qda <- predict(fit.qda, newdata = grid)$class

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.qda), data = grid, alpha = .1)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.te) +
  geom_point() +
  geom_point(aes(color = pred.qda), data = grid, alpha = .1)

SVM

library(e1071)
fit.svm <- svm(yc~., data = synth.tr, kernel = "radial")
pred <- predict(fit.svm, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 458  53
##    1  42 447
mean(pred != synth.te$yc) # test error
## [1] 0.095
grid$pred.svm <- predict(fit.svm, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm), data = grid, alpha = .1)

We should really tune this

tune.out <- tune(svm,yc~., data = synth.tr, kernel = "radial", 
                 ranges = list(cost = c(1,2,3), gamma = 10^seq(-4, 2, by = 1)) )
tune.out
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     2     1
## 
## - best performance: 0.124
fit.svm2 <- svm(yc~., data = synth.tr, kernel = "radial", 
                cost = 2, gamma = 1)

pred <- predict(fit.svm2, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 458  54
##    1  42 446
mean(pred != synth.te$yc) # test error
## [1] 0.096
grid$pred.svm2 <- predict(fit.svm2, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm2), data = grid, alpha = .1)

Try out differerent bandwidth parameter gamma:

fit.svm2 <- svm(yc~., data = synth.tr, kernel = "radial", 
                cost = 2, gamma = 10)

pred <- predict(fit.svm2, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 448  73
##    1  52 427
mean(pred != synth.te$yc) # test error
## [1] 0.125
grid$pred.svm2 <- predict(fit.svm2, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm2), data = grid, alpha = .1)

fit.svm2 <- svm(yc~., data = synth.tr, kernel = "radial", 
                cost = 2, gamma = 0.01)

pred <- predict(fit.svm2, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 454  61
##    1  46 439
mean(pred != synth.te$yc) # test error
## [1] 0.107
grid$pred.svm2 <- predict(fit.svm2, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm2), data = grid, alpha = .1)

Other kernels: polynomial and sigmoid:

fit.svm.poly <- svm(yc~., data = synth.tr, kernel = "polynomial")

pred <- predict(fit.svm.poly, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 421  29
##    1  79 471
mean(pred != synth.te$yc) # test error
## [1] 0.108
grid$pred.svm.poly <- predict(fit.svm.poly, newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm.poly), data = grid, alpha = .1)

fit.svm.sigmoid <- svm(yc~., data = synth.tr, kernel = "sigmoid")

pred <- predict(fit.svm.sigmoid, synth.te)
table(pred,  synth.te$yc)
##     
## pred   0   1
##    0 427 108
##    1  73 392
mean(pred != synth.te$yc) # test error
## [1] 0.181
grid$pred.svm.sigmoid <- predict(fit.svm.sigmoid , newdata = grid)

ggplot(aes(x = xs, y = ys, color = yc), data = synth.tr) +
  geom_point() +
  geom_point(aes(color = pred.svm.sigmoid), data = grid, alpha = .1)