set.seed(1)
x1 = runif(500) -0.5
x2 = runif(500) -0.5
y = 1*(x1^2-x2^2 > 0)
plot(x1,x2,xlab = "X1",ylab = "X2", col = (3-y),pch = (2-y))
logreg.fit<-glm(y~x1+x2, family = binomial)
summary(logreg.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
data = data.frame(x1=x1,x2=x2,y=y)
probs = predict(logreg.fit,data, type = "response")
preds = rep(0,500)
preds[probs>.47]= 1
plot(data[preds==1, ]$x1, data[preds==1,]$x2, col = (3-1),pch = (2-1), xlab = "X1",ylab = "X2")
points(data[preds==0,]$x1,data[preds==0,]$x2,col=(3-0),pch = (2-0))
nlm.fit<-glm(y~poly(x1,2)+poly(x2,2)+I(x1*x2), family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(nlm.fit)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.240e-04 -2.000e-08 -2.000e-08 2.000e-08 1.163e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.2 4302.0 -0.024 0.981
## poly(x1, 2)1 2715.3 141109.5 0.019 0.985
## poly(x1, 2)2 27218.5 842987.2 0.032 0.974
## poly(x2, 2)1 -279.7 97160.4 -0.003 0.998
## poly(x2, 2)2 -28693.0 875451.3 -0.033 0.974
## I(x1 * x2) -206.4 41802.8 -0.005 0.996
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.5810e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
probs = predict(nlm.fit, data, type = "response")
preds = rep(0,500)
preds[probs>.47]=1
plot(data[preds==1,]$x1, data[preds == 1, ]$x2,col = (3-1),pch = (2-1),xlab = "X1",ylab = "X2")
points(data[preds == 0,]$x1,data[preds == 0,]$x2,col = (3-0),pch = (2-0))
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
data$y = as.factor(data$y)
supvec.fit<-svm(y~x1+x2,data,kernel = "linear", cost = 0.01)
preds= predict(supvec.fit, data)
plot(data[preds==0,]$x1, data[preds == 0, ]$x2,col = (3-0),pch = (2-0),xlab = "X1",ylab = "X2")
points(data[preds==1,]$x1, data[preds==1,]$x2,col = (3-1),pch = (2-1))
data$y = as.factor(data$y)
supvet.nonlm<-svm(y~x1+x2, data, kernel = "radial", gamma = 1)
preds = predict(supvet.nonlm)
plot(data[preds==0,]$x1, data[preds == 0, ]$x2,col = (3-0),pch = (2-0),xlab = "X1",ylab = "X2")
points(data[preds==1,]$x1, data[preds==1,]$x2, col=(3-1),pch=(2-1))
Support vector model with a non-linear kernel and logic regression with interaction terms are about eqaul in terms of their decsison boundaries. Now the support vector with a linear kernel and the logic regrestion without the interactive terms dont do a good job creating a non linear decstion boundary.
“``````````````````````````````````````````````````````````````````````````````````````````````````”
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.5.3
var = ifelse(Auto$mpg>median(Auto$mpg),1,0)
Auto$mpglvl<-as.factor(var)
tune.fit<- tune(svm,mpglvl~.,data= Auto, kernel = "linear", ranges = list(cost = c(.01,.1,1,5,10,100,1000)))
summary(tune.fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01269231
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07397436 0.04254681
## 2 1e-01 0.04852564 0.04419726
## 3 1e+00 0.01269231 0.01783081
## 4 5e+00 0.02038462 0.02337923
## 5 1e+01 0.02551282 0.02948687
## 6 1e+02 0.03314103 0.02424635
## 7 1e+03 0.03314103 0.02424635
#ploynomial
tune.fit1<- tune(svm,mpglvl~.,data= Auto, kernel = "polynomial", ranges = list(cost = c(.01,.1,1,5,10,100,1000), degree = c(2,3,4)))
summary(tune.fit1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 1000 2
##
## - best performance: 0.239359
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-02 2 0.5482692 0.04501204
## 2 1e-01 2 0.5482692 0.04501204
## 3 1e+00 2 0.5482692 0.04501204
## 4 5e+00 2 0.5482692 0.04501204
## 5 1e+01 2 0.5177564 0.09019628
## 6 1e+02 2 0.3008974 0.04970069
## 7 1e+03 2 0.2393590 0.07445666
## 8 1e-02 3 0.5482692 0.04501204
## 9 1e-01 3 0.5482692 0.04501204
## 10 1e+00 3 0.5482692 0.04501204
## 11 5e+00 3 0.5482692 0.04501204
## 12 1e+01 3 0.5482692 0.04501204
## 13 1e+02 3 0.3414744 0.07661265
## 14 1e+03 3 0.2523077 0.04844654
## 15 1e-02 4 0.5482692 0.04501204
## 16 1e-01 4 0.5482692 0.04501204
## 17 1e+00 4 0.5482692 0.04501204
## 18 5e+00 4 0.5482692 0.04501204
## 19 1e+01 4 0.5482692 0.04501204
## 20 1e+02 4 0.5482692 0.04501204
## 21 1e+03 4 0.5277564 0.08083819
#radial
tune.fit2<- tune(svm,mpglvl~.,data= Auto, kernel = "radial", ranges = list(cost = c(.01,.1,1,5,10,100,1000), gamma = c(.01,.1,1,5,10,100)))
summary(tune.fit2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 100 0.01
##
## - best performance: 0.01269231
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 1e-02 0.56358974 0.03908890
## 2 1e-01 1e-02 0.08891026 0.05667837
## 3 1e+00 1e-02 0.07108974 0.05507886
## 4 5e+00 1e-02 0.04570513 0.04255952
## 5 1e+01 1e-02 0.02282051 0.03230837
## 6 1e+02 1e-02 0.01269231 0.01338091
## 7 1e+03 1e-02 0.02025641 0.02319375
## 8 1e-02 1e-01 0.19839744 0.08851997
## 9 1e-01 1e-01 0.07871795 0.06181517
## 10 1e+00 1e-01 0.05070513 0.05424359
## 11 5e+00 1e-01 0.02294872 0.02534336
## 12 1e+01 1e-01 0.02294872 0.02243651
## 13 1e+02 1e-01 0.02288462 0.01870128
## 14 1e+03 1e-01 0.02288462 0.01870128
## 15 1e-02 1e+00 0.56358974 0.03908890
## 16 1e-01 1e+00 0.56358974 0.03908890
## 17 1e+00 1e+00 0.06083333 0.05676402
## 18 5e+00 1e+00 0.05826923 0.05906290
## 19 1e+01 1e+00 0.05826923 0.05906290
## 20 1e+02 1e+00 0.05826923 0.05906290
## 21 1e+03 1e+00 0.05826923 0.05906290
## 22 1e-02 5e+00 0.56358974 0.03908890
## 23 1e-01 5e+00 0.56358974 0.03908890
## 24 1e+00 5e+00 0.48948718 0.05785842
## 25 5e+00 5e+00 0.48185897 0.06381563
## 26 1e+01 5e+00 0.48185897 0.06381563
## 27 1e+02 5e+00 0.48185897 0.06381563
## 28 1e+03 5e+00 0.48185897 0.06381563
## 29 1e-02 1e+01 0.56358974 0.03908890
## 30 1e-01 1e+01 0.56358974 0.03908890
## 31 1e+00 1e+01 0.50987179 0.06021184
## 32 5e+00 1e+01 0.50217949 0.05475358
## 33 1e+01 1e+01 0.50217949 0.05475358
## 34 1e+02 1e+01 0.50217949 0.05475358
## 35 1e+03 1e+01 0.50217949 0.05475358
## 36 1e-02 1e+02 0.56358974 0.03908890
## 37 1e-01 1e+02 0.56358974 0.03908890
## 38 1e+00 1e+02 0.56358974 0.03908890
## 39 5e+00 1e+02 0.56358974 0.03908890
## 40 1e+01 1e+02 0.56358974 0.03908890
## 41 1e+02 1e+02 0.56358974 0.03908890
## 42 1e+03 1e+02 0.56358974 0.03908890
looks like our radial kernal is about .015 for gamma.(lowest error rate)
“> plot(svmfit , dat) where svmfit contains your ???tted model and dat is a data frame containing your data, you can type
“> plot(svmfit , dat , x1???x4) in order to plot just the ???rst and fourth variables. However, you must replace x1 and x4 with the correct variable names. To ???nd out more, type ?plot.svm.
“``````````````````````````````````````````````````````````````````````````````````````````````````”
set.seed(1)
train = sample(nrow(OJ),800)
OJ.train = OJ[train,]
OJ.test = OJ[-train,]
svm.linear = svm(Purchase~.,data=OJ.train, kernal = "linear", cost = .01 )
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernal = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
## gamma: 0.05555556
##
## Number of Support Vectors: 617
##
## ( 306 311 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
print("our SVC created 617 out of 800 ")
## [1] "our SVC created 617 out of 800 "
train.pred<-predict(svm.linear,OJ.train)
table(OJ.train$Purchase,train.pred)
## train.pred
## CH MM
## CH 494 0
## MM 306 0
test.pred = predict(svm.linear,OJ.test)
table(OJ.test$Purchase,test.pred)
## test.pred
## CH MM
## CH 159 0
## MM 111 0
tune.out<- tune(svm, Purchase~., data = OJ.train,kernal = "linear", ranges = list(cost = 10^seq(-2,1,by = .25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.3162278
##
## - best performance: 0.1625
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.38250 0.04297932
## 2 0.01778279 0.38250 0.04297932
## 3 0.03162278 0.37250 0.04923018
## 4 0.05623413 0.19875 0.05382908
## 5 0.10000000 0.17875 0.05834821
## 6 0.17782794 0.16750 0.05779514
## 7 0.31622777 0.16250 0.05170697
## 8 0.56234133 0.16375 0.05084358
## 9 1.00000000 0.16375 0.05318012
## 10 1.77827941 0.16375 0.04693746
## 11 3.16227766 0.17125 0.04860913
## 12 5.62341325 0.17125 0.05304937
## 13 10.00000000 0.17125 0.05466120
svm.linear1 = svm(Purchase~., kernel = "linear", data = OJ.train ,cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear1,OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 438 56
## MM 72 234
(56+72)/ (438+56+72+234)
## [1] 0.16
test.pred<- predict(svm.linear1, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 140 19
## MM 30 81
(19+30)/(140+19+30+81)
## [1] 0.1814815
our error is .16
svm.radial = svm(Purchase~., kernel = "radial", data = OJ.train)
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.05555556
##
## Number of Support Vectors: 379
##
## ( 188 191 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred= predict(svm.radial, OJ.train)
table(OJ.train$Purchase,train.pred)
## train.pred
## CH MM
## CH 455 39
## MM 77 229
test.pred <- predict(svm.radial,OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 141 18
## MM 28 83
print("here our support vector is 379 out of 800")
## [1] "here our support vector is 379 out of 800"
svm.poly<- svm(Purchase~., data = OJ.train,kernel = "polynomial", degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "polynomial",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## gamma: 0.05555556
## coef.0: 0
##
## Number of Support Vectors: 454
##
## ( 224 230 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred= predict(svm.poly, OJ.train)
table(OJ.train$Purchase,train.pred)
## train.pred
## CH MM
## CH 461 33
## MM 105 201
test.pred <- predict(svm.poly,OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 149 10
## MM 41 70
print("The polynomial gamma support vector is 454 224 belong to CH levels and rest to MM")
## [1] "The polynomial gamma support vector is 454 224 belong to CH levels and rest to MM"
The best method is radial Kernel