x1 = -10:10
x2 = 1 + 3 * x1
plot(x1, x2, type = "l", col = "blue")
text(c(3), c(-5), "Greater than 0", col = "blue")
text(c(-1), c(15), "Less than 0", col = "blue")
lines(x1, 1 - x1/2, col = "red")
text(c(3), c(-10), "Less than 0", col = "red")
text(c(-1), c(20), "Greater than 0", col = "red")
legend("topleft",legend=c("1 + 3X_1 - X_2 = 0","−2+X_1+2X_2=0"),pch=19,col=c("blue", "red"))
radius = 2
plot(NA, NA, type = "n", xlim = c(-4, 2), ylim = c(-1, 5), asp = 1, xlab = "X1",
ylab = "X2")
symbols(c(-1), c(2), circles = c(radius), add = TRUE, inches = FALSE)
radius = 2
plot(NA, NA, type = "n", xlim = c(-4, 2), ylim = c(-1, 5), asp = 1, xlab = "X1",
ylab = "X2")
symbols(c(-1), c(2), circles = c(radius), add = TRUE, inches = FALSE)
text(c(-1), c(2), "< 4")
text(c(-4), c(2), "> 4")
plot(c(0, -1, 2, 3), c(0, 1, 2, 8), col = c("blue", "red", "blue", "blue"),
type = "p", asp = 1, xlab = "X1", ylab = "X2", ylim=c(-2,10))
symbols(c(-1), c(2), circles = c(2), add = TRUE, inches = FALSE)
text(c(0), c(-1), "(0,0)")
text(c(-1), c(2), "(−1,1)")
text(c(2), c(3), "(2,2)")
text(c(3), c(9), "(3,8)")
\(\begin{array}{cccc} \hline \mbox{Obs.} &X_1 &X_2 &Y \cr \hline 1 &3 &4 &\mbox{Red} \cr 2 &2 &2 &\mbox{Red} \cr 3 &4 &4 &\mbox{Red} \cr 4 &1 &4 &\mbox{Red} \cr 5 &2 &1 &\mbox{Blue} \cr 6 &4 &3 &\mbox{Blue} \cr 7 &4 &1 &\mbox{Blue} \cr \hline \end{array}\)
Sketch the observations.
x1 = c(3, 2, 4, 1, 2, 4, 4)
x2 = c(4, 2, 4, 4, 1, 3, 1)
plot(x1, x2, col = c("red", "red", "red", "red", "blue", "blue", "blue"), pch=20, ylim = c(0, 5), xlim=c(0, 5), asp=1)
\(X_1 - X_2 - 0.5 = 0\)
plot(x1, x2, col = c("red", "red", "red", "red", "blue", "blue", "blue"), xlim = c(0, 5), ylim = c(0, 5))
abline(-0.5, 1)
plot(x1, x2, col = c("red", "red", "red", "red", "blue", "blue", "blue"), xlim = c(0, 5), ylim = c(0, 5))
abline(-0.5, 1)
abline(-1, 1, lty = 2)
abline(0, 1, lty = 2)
plot(x1, x2, col = c("red", "red", "red", "red", "blue", "blue", "blue"), xlim = c(0, 5), ylim = c(0, 5))
abline(-0.2, 1)
plot(x1, x2, col = c("red", "red", "red", "red", "blue", "blue", "blue"), xlim = c(0, 5), ylim = c(0, 5))
points(c(3), c(1), col = c("red"))
set.seed(11)
x = rnorm(100)
y = 4 * x^2 + +6 + rnorm(100)
train = sample(100, 50)
y[train] = y[train] + 4
y[-train] = y[-train] - 4
plot(x[train], y[train], pch="o", col="red", ylim=c(-4, 20), xlab="X", ylab="Y")
points(x[-train], y[-train], pch="o", col="blue")
set.seed(11)
library(e1071)
z = rep(0, 100)
z[train] = 1
z = as.factor(z)
data = data.frame(x = x, y = y, z = z)
split = sample(100, 50)
data.test = data[split, ]
data.train = data[-split, ]
svm.linear = svm(z ~ ., data = data.train, kernel = "linear", cost = 10)
plot(svm.linear, data.train)
pred = predict(svm.linear, data.train)
table(predict=pred, truth=data.train$z)
## truth
## predict 0 1
## 0 20 0
## 1 2 28
pred = predict(svm.linear, data.test)
table(predict=pred, truth=data.test$z)
## truth
## predict 0 1
## 0 22 0
## 1 6 22
svm.poly = svm(z ~ ., data = data.train, kernel = "polynomial", cost = 10)
plot(svm.poly, data.train)
pred = predict(svm.poly, data.train)
table(predict=pred, truth=data.train$z)
## truth
## predict 0 1
## 0 20 0
## 1 2 28
pred = predict(svm.poly, data.test)
table(predict=pred, truth=data.test$z)
## truth
## predict 0 1
## 0 21 0
## 1 7 22
svm.radial = svm(z ~ ., data = data.train, kernel = "radial",gamma =2, cost = 10)
plot(svm.radial, data.train)
pred = predict(svm.radial, data.train)
table(predict=pred, truth=data.train$z)
## truth
## predict 0 1
## 0 22 0
## 1 0 28
pred = predict(svm.radial, data.test)
table(predict=pred, truth=data.test$z)
## truth
## predict 0 1
## 0 26 0
## 1 2 22
set.seed(753)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2", pch = "o")
points(x1[y == 1], x2[y == 1], col = "blue", pch = "o")
logit.fit = glm(y ~ x1 + x2, family = binomial)
summary(logit.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.324 -1.230 1.047 1.117 1.213
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.13530 0.08992 1.505 0.132
## x1 0.36640 0.29926 1.224 0.221
## x2 -0.10610 0.30989 -0.342 0.732
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 690.83 on 499 degrees of freedom
## Residual deviance: 689.21 on 497 degrees of freedom
## AIC: 695.21
##
## Number of Fisher Scoring iterations: 3
data = data.frame(x1 = x1, x2 = x2, y = y)
logit.prob= predict(logit.fit, data, type="response")
logit.pred = ifelse(logit.prob > 0.5, 1, 0)
data.1 = data[logit.pred == 1, ]
data.0 = data[logit.pred == 0, ]
plot(data.1$x1, data.1$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "o")
points(data.0$x1, data.0$x2, col = "red", pch = "o")
logit2.fit = glm(y ~ poly(x1, 2) + poly(x2, 2)+I(x1 * x2), family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logit2.fit)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.49 0.00 0.00 0.00 8.49
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.743e+14 3.001e+06 91406484 <2e-16 ***
## poly(x1, 2)1 4.194e+15 6.719e+07 62416779 <2e-16 ***
## poly(x1, 2)2 3.287e+16 6.725e+07 488851589 <2e-16 ***
## poly(x2, 2)1 2.774e+14 6.713e+07 4132876 <2e-16 ***
## poly(x2, 2)2 -3.788e+16 6.721e+07 -563529489 <2e-16 ***
## I(x1 * x2) 5.757e+14 3.406e+07 16901037 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 690.83 on 499 degrees of freedom
## Residual deviance: 2739.32 on 494 degrees of freedom
## AIC: 2751.3
##
## Number of Fisher Scoring iterations: 25
data = data.frame(x1 = x1, x2 = x2, y = y)
logit.prob= predict(logit2.fit, data, type="response")
logit.pred = ifelse(logit.prob > 0.5, 1, 0)
data.1 = data[logit.pred == 1, ]
data.0 = data[logit.pred == 0, ]
plot(data.1$x1, data.1$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "o")
points(data.0$x1, data.0$x2, col = "red", pch = "o")
data$y = as.factor(data$y)
svm.fit = svm(y ~ x1 + x2, data, kernel = "linear", cost = 0.1)
svm.pred = predict(svm.fit, data)
data.svm.1 = data[svm.pred == 1, ]
data.svm.0 = data[svm.pred == 0, ]
plot(data.svm.1$x1, data.svm.1$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data.svm.0$x1, data.svm.0$x2, col = "red", pch = 4)
data$y = as.factor(data$y)
svm.fit = svm(y ~ x1 + x2, data, kernel = "radial", gamma =2, cost = 0.1)
svm.pred = predict(svm.fit, data)
data.svm.1 = data[svm.pred == 1, ]
data.svm.0 = data[svm.pred == 0, ]
plot(data.svm.1$x1, data.svm.1$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data.svm.0$x1, data.svm.0$x2, col = "red", pch = 4)
set.seed(1)
x=matrix(rnorm(500*2), ncol=2)
y=c(rep(-1,250), rep(1, 250))
x[y==1, ] = x[y==1, ] +3
plot(x, col=(y+5)/2, pch=19)
set.seed(1)
data=data.frame(x=x, y=as.factor(y))
tune.out = tune(svm, y ~ ., data = data, kernel = "linear", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000, 10000)))
report = data.frame(cost = tune.out$performances$cost, misclass = tune.out$performances$error)
report
## cost misclass
## 1 1e-03 0.016
## 2 1e-02 0.010
## 3 1e-01 0.012
## 4 1e+00 0.010
## 5 5e+00 0.008
## 6 1e+01 0.012
## 7 1e+02 0.014
## 8 1e+03 0.014
## 9 1e+04 0.014
bestmod = tune.out$best.model
summary(bestmod)
##
## Call:
## best.tune(method = svm, train.x = y ~ ., data = data, ranges = list(cost = c(0.001,
## 0.01, 0.1, 1, 5, 10, 100, 1000, 10000)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 5
##
## Number of Support Vectors: 21
##
## ( 11 10 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
set.seed(58)
xtest=matrix(rnorm(500*2), ncol=2)
ytest=sample(c(-1,1), 500, re=TRUE)
xtest[ytest==1, ] = xtest[ytest==1, ] +3
testdat = data.frame(x=xtest, y=as.factor(ytest))
costs = c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000, 10000)
test.err = rep(NA, length(costs))
for (i in 1:length(costs)) {
svm.fit = svm(y ~ ., data = data, kernel = "linear", cost = costs[i])
pred = predict(svm.fit, testdat)
test.err[i] = sum(pred != testdat$y)
}
data.frame(cost = costs, misclass = test.err)
## cost misclass
## 1 1e-03 6
## 2 1e-02 6
## 3 1e-01 6
## 4 1e+00 7
## 5 5e+00 8
## 6 1e+01 7
## 7 1e+02 6
## 8 1e+03 6
## 9 1e+04 6
library(ISLR)
bin = ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpgfactor = as.factor(bin)
set.seed(1)
tune.out = tune(svm, mpgfactor ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.09442308 0.04519425
## 2 1e-02 0.07653846 0.03617137
## 3 1e-01 0.04596154 0.03378238
## 4 1e+00 0.01025641 0.01792836
## 5 5e+00 0.02051282 0.02648194
## 6 1e+01 0.02051282 0.02648194
## 7 1e+02 0.03076923 0.03151981
## 8 1e+03 0.03076923 0.03151981
set.seed(1)
tune.out = tune(svm, mpgfactor ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100), degree = c(2, 3, 4, 5)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 2
##
## - best performance: 0.3013462
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-03 2 0.5511538 0.04366593
## 2 1e-02 2 0.5511538 0.04366593
## 3 1e-01 2 0.5511538 0.04366593
## 4 1e+00 2 0.5511538 0.04366593
## 5 5e+00 2 0.5511538 0.04366593
## 6 1e+01 2 0.5130128 0.08963366
## 7 1e+02 2 0.3013462 0.09961961
## 8 1e-03 3 0.5511538 0.04366593
## 9 1e-02 3 0.5511538 0.04366593
## 10 1e-01 3 0.5511538 0.04366593
## 11 1e+00 3 0.5511538 0.04366593
## 12 5e+00 3 0.5511538 0.04366593
## 13 1e+01 3 0.5511538 0.04366593
## 14 1e+02 3 0.3446154 0.09821588
## 15 1e-03 4 0.5511538 0.04366593
## 16 1e-02 4 0.5511538 0.04366593
## 17 1e-01 4 0.5511538 0.04366593
## 18 1e+00 4 0.5511538 0.04366593
## 19 5e+00 4 0.5511538 0.04366593
## 20 1e+01 4 0.5511538 0.04366593
## 21 1e+02 4 0.5511538 0.04366593
## 22 1e-03 5 0.5511538 0.04366593
## 23 1e-02 5 0.5511538 0.04366593
## 24 1e-01 5 0.5511538 0.04366593
## 25 1e+00 5 0.5511538 0.04366593
## 26 5e+00 5 0.5511538 0.04366593
## 27 1e+01 5 0.5511538 0.04366593
## 28 1e+02 5 0.5511538 0.04366593
set.seed(1)
tune.out = tune(svm, mpgfactor ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100), gamma = c(0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 100 0.01
##
## - best performance: 0.01282051
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-03 1e-03 0.55115385 0.04366593
## 2 1e-02 1e-03 0.55115385 0.04366593
## 3 1e-01 1e-03 0.50794872 0.07765335
## 4 1e+00 1e-03 0.09185897 0.04376958
## 5 5e+00 1e-03 0.07653846 0.03617137
## 6 1e+01 1e-03 0.07403846 0.03522110
## 7 1e+02 1e-03 0.02814103 0.01893035
## 8 1e-03 1e-02 0.55115385 0.04366593
## 9 1e-02 1e-02 0.55115385 0.04366593
## 10 1e-01 1e-02 0.08929487 0.04382379
## 11 1e+00 1e-02 0.07403846 0.03522110
## 12 5e+00 1e-02 0.04852564 0.03303346
## 13 1e+01 1e-02 0.02557692 0.02093679
## 14 1e+02 1e-02 0.01282051 0.01813094
## 15 1e-03 1e-01 0.55115385 0.04366593
## 16 1e-02 1e-01 0.21711538 0.09865227
## 17 1e-01 1e-01 0.07903846 0.03874545
## 18 1e+00 1e-01 0.05371795 0.03525162
## 19 5e+00 1e-01 0.02820513 0.03299190
## 20 1e+01 1e-01 0.03076923 0.03375798
## 21 1e+02 1e-01 0.03583333 0.02759051
## 22 1e-03 1e+00 0.55115385 0.04366593
## 23 1e-02 1e+00 0.55115385 0.04366593
## 24 1e-01 1e+00 0.55115385 0.04366593
## 25 1e+00 1e+00 0.06384615 0.04375618
## 26 5e+00 1e+00 0.05884615 0.04020934
## 27 1e+01 1e+00 0.05884615 0.04020934
## 28 1e+02 1e+00 0.05884615 0.04020934
## 29 1e-03 5e+00 0.55115385 0.04366593
## 30 1e-02 5e+00 0.55115385 0.04366593
## 31 1e-01 5e+00 0.55115385 0.04366593
## 32 1e+00 5e+00 0.49493590 0.04724924
## 33 5e+00 5e+00 0.48217949 0.05470903
## 34 1e+01 5e+00 0.48217949 0.05470903
## 35 1e+02 5e+00 0.48217949 0.05470903
## 36 1e-03 1e+01 0.55115385 0.04366593
## 37 1e-02 1e+01 0.55115385 0.04366593
## 38 1e-01 1e+01 0.55115385 0.04366593
## 39 1e+00 1e+01 0.51794872 0.05063697
## 40 5e+00 1e+01 0.51794872 0.04917316
## 41 1e+01 1e+01 0.51794872 0.04917316
## 42 1e+02 1e+01 0.51794872 0.04917316
## 43 1e-03 1e+02 0.55115385 0.04366593
## 44 1e-02 1e+02 0.55115385 0.04366593
## 45 1e-01 1e+02 0.55115385 0.04366593
## 46 1e+00 1e+02 0.55115385 0.04366593
## 47 5e+00 1e+02 0.55115385 0.04366593
## 48 1e+01 1e+02 0.55115385 0.04366593
## 49 1e+02 1e+02 0.55115385 0.04366593
svm.linear = svm(mpgfactor ~ ., data = Auto, kernel = "linear",cost = 1, decision.values=T)
svm.poly = svm(mpgfactor ~ ., data = Auto, kernel = "polynomial", cost = 100, degree = 2, decision.values=T)
svm.radial = svm(mpgfactor ~ ., data = Auto, kernel = "radial", cost = 100, gamma = 0.01, decision.values=T)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpgfactor", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
plotpairs(svm.poly)
plotpairs(svm.radial)
library(ISLR)
set.seed(1)
train = sample(nrow(OJ), 800)
oj.train = OJ[train, ]
oj.test = OJ[-train, ]
svm.linear = svm(Purchase~., data=oj.train, kernel="linear", cost = 0.01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = oj.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.linear, oj.train)
table(oj.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 420 65
## MM 75 240
(75 + 65) / (420 + 240 + 75 + 65)
## [1] 0.175
test.pred = predict(svm.linear, oj.test)
table(oj.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 153 15
## MM 33 69
(33 + 15) / (153 + 69 + 15 + 33)
## [1] 0.1777778
set.seed(1)
tune.out = tune(svm, Purchase ~ ., data = oj.train, kernel = "linear", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, 1, 5, 10)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.05 0.17625 0.02853482
## 3 0.10 0.17250 0.03162278
## 4 0.50 0.16875 0.02651650
## 5 1.00 0.17500 0.02946278
## 6 5.00 0.17250 0.03162278
## 7 10.00 0.17375 0.03197764
svm.linear = svm(Purchase ~ ., kernel = "linear", data = oj.train, cost = tune.out$best.parameter$cost)
train.pred = predict(svm.linear, oj.train)
table(oj.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 424 61
## MM 71 244
(71 + 61) / (242 + 244 + 61 +71 )
## [1] 0.2135922
test.pred = predict(svm.linear, oj.test)
table(oj.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 155 13
## MM 29 73
(29+13)/(155+73+13+29)
## [1] 0.1555556
set.seed(1)
tune.out = tune(svm, Purchase ~ ., data = oj.train, kernel = "radial", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, 1, 5, 10)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5
##
## - best performance: 0.1675
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.05 0.20250 0.03374743
## 3 0.10 0.18625 0.02853482
## 4 0.50 0.16750 0.02443813
## 5 1.00 0.17125 0.02128673
## 6 5.00 0.18000 0.02220485
## 7 10.00 0.18625 0.02853482
svm.radial = svm(Purchase ~ ., kernel = "radial", data = oj.train, cost = tune.out$best.parameter$cost)
train.pred = predict(svm.radial, oj.train)
table(oj.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 438 47
## MM 71 244
(47+71)/(438+71+47+244)
## [1] 0.1475
test.pred = predict(svm.radial, oj.test)
table(oj.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 150 18
## MM 30 72
(18+30)/(150+18+30+72)
## [1] 0.1777778
set.seed(1)
tune.out = tune(svm, Purchase ~ ., data = oj.train, kernel = "poly", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, 1, 5, 10)), degree = 2)
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.05 0.34875 0.04348132
## 3 0.10 0.32125 0.05001736
## 4 0.50 0.20625 0.04050463
## 5 1.00 0.20250 0.04116363
## 6 5.00 0.18250 0.03496029
## 7 10.00 0.18125 0.02779513
svm.poly = svm(Purchase ~ ., kernel = "poly", data = oj.train, cost = tune.out$best.parameter$cost)
train.pred = predict(svm.poly, oj.train)
table(oj.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 446 39
## MM 75 240
(39+75)/(446+39+75+240)
## [1] 0.1425
test.pred = predict(svm.poly, oj.test)
table(oj.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 155 13
## MM 42 60
(13+42)/(155+13+42+60)
## [1] 0.2037037