set.seed(30)
x1 <- rnorm(100, mean = 0, sd = 1)
x2 <- x1^2 + rnorm(100, mean = 3, sd = 1)
Seqx <- seq(from = min(x1), to = max(x1),length.out = 100)
Seqy <- Seqx^2 + 3
fc <- rep("Good", 100)
fc[x2<Seqy] <- "Bad"
fc <- as.factor(fc)
You can also embed plots, for example:
Dat <- data.frame(x1,x2, fc)
svmfit <- svm(fc~., data = Dat, kernel = "polynomial", degree = 3,
cost = 0.1, scale = FALSE)
plot(svmfit, Dat)
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1*(x1^2-x2^2 > 0)
yf <- as.factor(y)
Data.2 <- data.frame(x1,x2,y,yf)
plot(x1,x2,col = yf)
Logit.model <- glm(formula = y~. -yf, data = Data.2, family = 'binomial')
Data.3 <- Data.2[, -4]
library(caret)
## Loading required package: lattice
i.Data.3 <- createDataPartition(Data.3$y, p = 0.8, list = FALSE)
Train.Data.3 <- Data.3[i.Data.3,]
Test.Data.3 <- Data.3[-i.Data.3,]
Logit.model <- glm(formula = y~. , data = Train.Data.3, family = 'binomial')
pred.logit <- predict(Logit.model, type = 'response')
pred.logit.Tb <- rep(0,400)
pred.logit.Tb[pred.logit > 0.5] <- 1
Tb <- table(Pred = pred.logit.Tb, Truth = Train.Data.3$y)
Tb
## Truth
## Pred 0 1
## 0 63 18
## 1 124 195
(Tb[1,1] + Tb[2,2])/sum(Tb)
## [1] 0.645
plot(Train.Data.3$x1,Train.Data.3$x2, col = as.factor(pred.logit.Tb))
Logit.model <- glm(formula = y~ I(x1^2) + I(x2^2), data = Train.Data.3, family = 'binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
pred.logit <- predict(Logit.model, type = 'response')
pred.logit.Tb <- rep(0,400)
pred.logit.Tb[pred.logit > 0.5] <- 1
Tb <- table(Pred = pred.logit.Tb, Truth = Train.Data.3$y)
Tb
## Truth
## Pred 0 1
## 0 187 0
## 1 0 213
Table
(Tb[1,1] + Tb[2,2])/sum(Tb)
## [1] 1
Plot
plot(Train.Data.3$x1,Train.Data.3$x2, col = as.factor(pred.logit.Tb))
Train.Data.3$y <- as.factor(Train.Data.3$y)
tune.out <- tune(svm, y~., data = Train.Data.3,
kernel = "radial",
ranges = list(cost = c(0.1,1,10,100,1000)),
gamma = c(0.5,1,2,3,4))
pred.tune.out <- predict(tune.out$best.model,Train.Data.3)
plot(Train.Data.3$x1,Train.Data.3$x2, col = pred.tune.out)
Table
table(Pred = pred.tune.out, Truth = Train.Data.3$y)
## Truth
## Pred 0 1
## 0 185 0
## 1 2 213
tune.out <- tune(svm, y~., data = Train.Data.3,
kernel = "polynomial",
ranges = list(cost = c(0.1,1,10,100,1000)),
gamma = c(0.5,1,2,3,4))
pred.tune.out <- predict(tune.out$best.model,Train.Data.3)
plot(Train.Data.3$x1,Train.Data.3$x2, col = pred.tune.out)
Table
table(Pred = pred.tune.out, Truth = Train.Data.3$y)
## Truth
## Pred 0 1
## 0 0 0
## 1 187 213
auto <- Auto[,-c(9)]
head(auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
auto <- auto %>%
mutate(mpgbn = ifelse(mpg > median(mpg), 1,0))
auto$mpgbn <- as.factor(auto$mpgbn)
auto <- auto[, -1]
library(caret)
set.seed(30)
i.F.Auto <- createFolds(y = auto$mpgbn, k = 10, list = TRUE)
poly.error.rate <- matrix(NA ,10, 100)
cost <- seq(0.01,10, length.out = 100)
degree <- seq(0.01,10, length.out = 100)
gamma <- seq(0.01,10, length.out = 100)
for(i in 1:10) {
train.dat <- auto[i.F.Auto[[i]],]
cv.dat <- auto[-i.F.Auto[[i]],]
for(j in 1:100)
{svm.fit <-svm(mpgbn~., data = train.dat, kernel = "polynomial", degree = degree[j],
cost = 0.01, scale = TRUE)
pred <- predict(svm.fit, cv.dat)
tb <- table(pred, cv.dat$mpgbn)
poly.error.rate[i,j] <- 1-((tb[1,1]+tb[1,2])/sum(tb))}
}
cv.poly.error <- apply(poly.error.rate, 2, mean)
rd.error.rate <- matrix(NA ,10, 100)
tune.out <- tune(svm, mpgbn~., data = auto,
kernel = "radial",
range = list(cost = cost),
gamma = gamma)
for(i in 1:10) {
train.dat <- auto[i.F.Auto[[i]],]
cv.dat <- auto[-i.F.Auto[[i]],]
for(j in 1:100)
{svm.fit <-svm(mpgbn~., data = train.dat, kernel = "radial", gamma = gamma[j],
cost = 0.6154545, scale = TRUE)
pred <- predict(svm.fit, cv.dat)
tb <- table(pred, cv.dat$mpgbn)
rd.error.rate[i,j] <- 1-((tb[1,1]+tb[1,2])/sum(tb))}
}
rd.cv.Error <- apply(rd.error.rate,2,mean)
round(rd.cv.Error[which.min(rd.cv.Error)],digits = 3); gamma[which.min(rd.cv.Error)]
## [1] 0.524
## [1] 1.523636
table
| Error | Deg/Gam | |
|---|---|---|
| Radial | 0.524 | 1.523636 |
| Poly | 0.625 | 1.019091 |
poly.svm.best <- svm(mpgbn~., data = auto, kernel="polynomial", cost = 0.01, degree = 1.019091, scale = TRUE)
rd.svm.best <- svm(mpgbn~., data = auto, kernel="radial", cost = 0.01, gamma = 10, scale = TRUE)
par(mfrow = c(1,2))
plot(rd.svm.best, auto, displacement~weight)
oj <- OJ
i.Train.o <- createDataPartition(y = oj$Purchase, p = 800/nrow(oj), list = FALSE)
Train.o <- oj[i.Train.o, ]
Test.o <- oj[-i.Train.o, ]
oj.svm.fit <- svm(Purchase ~. , data = Train.o, cost = 0.01,
kernel = 'linear')
summary(oj.svm.fit)
##
## Call:
## svm(formula = Purchase ~ ., data = Train.o, cost = 0.01, kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
## gamma: 0.05555556
##
## Number of Support Vectors: 448
##
## ( 224 224 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
tr.pred <- predict(oj.svm.fit)
ts.pred <- predict(oj.svm.fit, newdata = Test.o)
Tr.table <- table(Predict = tr.pred, Truth = Train.o$Purchase)
Ts.table <- table(Predict = ts.pred, Truth = Test.o$Purchase)
(Tr.table[1,2]+Tr.table[2,1])/sum(Tr.table); (Ts.table[1,2]+Ts.table[2,1])/sum(Ts.table)
## [1] 0.1735331
## [1] 0.1375465
cost <- seq(0.01,10,100)
tune.oj.svm.fit <- tune(svm, Purchase~., data = oj,
kernel = 'linear',
ranges = list(cost = cost))
tune.oj.svm.fit$best.model
##
## Call:
## best.tune(method = svm, train.x = Purchase ~ ., data = oj, ranges = list(cost = cost),
## kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
## gamma: 0.05555556
##
## Number of Support Vectors: 560
The optimal cost is 0.01, so no need to calculate
cost <- seq(0.01,10,100)
tune.oj.rd.svm.fit <- tune(svm, Purchase~., data = Train.o,
kernel = 'radial',
ranges = list(cost = cost))
tr.rd.pred <- predict(tune.oj.rd.svm.fit$best.model, Train.o)
ts.rd.pred <- predict(tune.oj.rd.svm.fit$best.model, Test.o)
table(pred = tr.rd.pred, truth = Train.o$Purchase)
## truth
## pred CH MM
## CH 489 312
## MM 0 0
table(pred = ts.rd.pred, truth = Test.o$Purchase)
## truth
## pred CH MM
## CH 164 105
## MM 0 0
radial is not fit.
tune.oj.polu.svm.fit <- tune(svm, Purchase~., data = Train.o,
kernel = 'polynomial',
ranges = list(cost = cost),
degree = 2)
tr.rd.pred <- predict(tune.oj.polu.svm.fit$best.model, Train.o)
ts.rd.pred <- predict(tune.oj.polu.svm.fit$best.model, Test.o)
table(pred = tr.rd.pred, truth = Train.o$Purchase)
## truth
## pred CH MM
## CH 489 312
## MM 0 0
table(pred = ts.rd.pred, truth = Test.o$Purchase)
## truth
## pred CH MM
## CH 164 105
## MM 0 0
Linear approach is rock!