library(titanic)
library(caret)
library(lattice)
library(ggplot2)
library(gam)
library(car)
library(ROCR)
library(ISLR)
library(e1071)
library(data.table)
library(ggmosaic)
set.seed(1)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)
dt <- data.table(y = as.factor(y), x1=x1, x2=x2)
ggplot(dt, aes(x=x1, y=x2, col=y)) + geom_point()
log.fit <- glm(y ~ x1 + x2, family = binomial)
summary(log.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
prob <- predict(log.fit, dt, type="response")
log.pred <- ifelse(prob > 0.5, 1, 0)
ggplot(data.table(x1=dt$x1, x2=dt$x2, pred = as.factor(log.pred)), aes(x=x1,y=x2,col=pred)) + geom_point()
log.poly <- glm(y ~ poly(x1, 2) + poly(x2, 2), family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(log.poly)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2), family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.079e-03 -2.000e-08 -2.000e-08 2.000e-08 1.297e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -94.48 2963.78 -0.032 0.975
## poly(x1, 2)1 3442.52 104411.28 0.033 0.974
## poly(x1, 2)2 30110.74 858421.66 0.035 0.972
## poly(x2, 2)1 162.82 26961.99 0.006 0.995
## poly(x2, 2)2 -31383.76 895267.48 -0.035 0.972
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 4.2881e-06 on 495 degrees of freedom
## AIC: 10
##
## Number of Fisher Scoring iterations: 25
prob <- predict(log.poly, dt, type="response")
log.pred <- ifelse(prob > 0.5, 1, 0)
ggplot(data.table(x1=dt$x1, x2=dt$x2, pred = as.factor(log.pred)), aes(x=x1,y=x2,col=pred)) + geom_point()
svm.fit <- svm(as.factor(y) ~ x1 + x2, dt, kernel = "linear", cost = 1)
svm.pred <- predict(svm.fit, dt)
ggplot(data.table(x1=dt$x1, x2=dt$x2, pred = as.factor(svm.pred)), aes(x=x1,y=x2,col=pred)) + geom_point()
svm.fit <- svm(as.factor(y) ~ x1 + x2, dt, gamma = 1)
svm.pred <- predict(svm.fit, dt)
ggplot(data.table(x1=dt$x1, x2=dt$x2, pred = as.factor(svm.pred)), aes(x=x1,y=x2,col=pred)) + geom_point()
We can see from these models that SVM with a non-linear kernel are great at finding non-linear boundaries.
gas.med = median(Auto$mpg)
new.var = ifelse(Auto$mpg > gas.med, 1, 0)
Auto$mpglevel = as.factor(new.var)
set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01,
0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981
set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1,
1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5130128
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5511538 0.04366593
## 2 1.0 2 0.5511538 0.04366593
## 3 5.0 2 0.5511538 0.04366593
## 4 10.0 2 0.5130128 0.08963366
## 5 0.1 3 0.5511538 0.04366593
## 6 1.0 3 0.5511538 0.04366593
## 7 5.0 3 0.5511538 0.04366593
## 8 10.0 3 0.5511538 0.04366593
## 9 0.1 4 0.5511538 0.04366593
## 10 1.0 4 0.5511538 0.04366593
## 11 5.0 4 0.5511538 0.04366593
## 12 10.0 4 0.5511538 0.04366593
set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1,
1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02557692
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08929487 0.04382379
## 2 1.0 1e-02 0.07403846 0.03522110
## 3 5.0 1e-02 0.04852564 0.03303346
## 4 10.0 1e-02 0.02557692 0.02093679
## 5 0.1 1e-01 0.07903846 0.03874545
## 6 1.0 1e-01 0.05371795 0.03525162
## 7 5.0 1e-01 0.02820513 0.03299190
## 8 10.0 1e-01 0.03076923 0.03375798
## 9 0.1 1e+00 0.55115385 0.04366593
## 10 1.0 1e+00 0.06384615 0.04375618
## 11 5.0 1e+00 0.05884615 0.04020934
## 12 10.0 1e+00 0.05884615 0.04020934
## 13 0.1 5e+00 0.55115385 0.04366593
## 14 1.0 5e+00 0.49493590 0.04724924
## 15 5.0 5e+00 0.48217949 0.05470903
## 16 10.0 5e+00 0.48217949 0.05470903
## 17 0.1 1e+01 0.55115385 0.04366593
## 18 1.0 1e+01 0.51794872 0.05063697
## 19 5.0 1e+01 0.51794872 0.04917316
## 20 10.0 1e+01 0.51794872 0.04917316
## 21 0.1 1e+02 0.55115385 0.04366593
## 22 1.0 1e+02 0.55115385 0.04366593
## 23 5.0 1e+02 0.55115385 0.04366593
## 24 10.0 1e+02 0.55115385 0.04366593
svm.linear = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10,
degree = 2)
svm.radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
plotpairs(svm.radial)
plotpairs(svm.poly)
set.seed(1)
train = sample(dim(OJ)[1], 800)
OJ.train = OJ[train, ]
OJ.test = OJ[-train, ]
svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = 0.01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 420 65
## MM 75 240
test.pred = predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 153 15
## MM 33 69
set.seed(1554)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "linear", ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.3162278
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17750 0.06635343
## 2 0.01778279 0.17750 0.05916080
## 3 0.03162278 0.17500 0.06095308
## 4 0.05623413 0.17375 0.06755913
## 5 0.10000000 0.17625 0.06755913
## 6 0.17782794 0.17625 0.06573569
## 7 0.31622777 0.17125 0.06483151
## 8 0.56234133 0.17375 0.06573569
## 9 1.00000000 0.17250 0.06258328
## 10 1.77827941 0.17500 0.06997023
## 11 3.16227766 0.17250 0.06661456
## 12 5.62341325 0.17625 0.07155272
## 13 10.00000000 0.17875 0.07072295
svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 423 62
## MM 71 244
test.pred = predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 155 13
## MM 29 73
set.seed(1)
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial")
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 441 44
## MM 77 238
test.pred = predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 151 17
## MM 33 69
set.seed(1)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "radial", ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5623413
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39375 0.04007372
## 2 0.01778279 0.39375 0.04007372
## 3 0.03162278 0.35750 0.05927806
## 4 0.05623413 0.19500 0.02443813
## 5 0.10000000 0.18625 0.02853482
## 6 0.17782794 0.18250 0.03291403
## 7 0.31622777 0.17875 0.03230175
## 8 0.56234133 0.16875 0.02651650
## 9 1.00000000 0.17125 0.02128673
## 10 1.77827941 0.17625 0.02079162
## 11 3.16227766 0.17750 0.02266912
## 12 5.62341325 0.18000 0.02220485
## 13 10.00000000 0.18625 0.02853482
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial", cost = tune.out$best.parameters$cost)
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 437 48
## MM 71 244
Overall the radial based kernel seems to be giving the best results within the training and testing data.