library(e1071)
library(ISLR)
set.seed(1)
X1 <- runif(500) - 0.5
X2 <- runif(500) - 0.5
y <- 1 * (X1^2 - X2^2 > 0)
plot(X1[y == 0], X2[y == 0], col = "orange", xlab = "X1", ylab = "X2")
points(X1[y == 1], X2[y == 1], col = "blue")
GLM <- glm(y ~ X1 + X2, family = binomial)
summary(GLM)
##
## Call:
## glm(formula = y ~ X1 + X2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## X1 0.196199 0.316864 0.619 0.536
## X2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
data <- data.frame(X1 = X1, X2 = X2, y = y)
GLM.prob <- predict(GLM, data, type = "response")
GLM.pred <- ifelse(GLM.prob > 0.5, 1, 0)
data.pos <- data[GLM.pred == 1, ]
data.neg <- data[GLM.pred == 0, ]
plot(data.pos$X1, data.pos$X2, col = "blue", xlab = "X1", ylab = "X2")
points(data.neg$X1, data.neg$X2, col = "orange")
GLM1 <- glm(y ~ poly(X1, 2) + poly(X2, 2) + I(X1 * X2), data = data, family = 'binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(GLM1)
##
## Call:
## glm(formula = y ~ poly(X1, 2) + poly(X2, 2) + I(X1 * X2), family = "binomial",
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.240e-04 -2.000e-08 -2.000e-08 2.000e-08 1.163e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.2 4302.0 -0.024 0.981
## poly(X1, 2)1 2715.3 141109.5 0.019 0.985
## poly(X1, 2)2 27218.5 842987.2 0.032 0.974
## poly(X2, 2)1 -279.7 97160.4 -0.003 0.998
## poly(X2, 2)2 -28693.0 875451.3 -0.033 0.974
## I(X1 * X2) -206.4 41802.8 -0.005 0.996
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.5810e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
GLM1.prob <- predict(GLM1, data, type = "response")
GLM1.pred <- ifelse(GLM1.prob > 0.5, 1, 0)
data.pos <- data[GLM1.pred == 1, ]
data.neg <- data[GLM1.pred == 0, ]
plot(data.pos$X1, data.pos$X2, col = "blue", xlab = "X1", ylab = "X2")
points(data.neg$X1, data.neg$X2, col = "orange")
SVM <- svm(as.factor(y)~ X1 + X2, data = data, kernal = "linear", cost = 0.1)
SVM.pred <- predict(SVM, data)
SVM.pos <- data[SVM.pred == 1,]
SVM.neg <- data[SVM.pred == 0,]
plot(SVM.pos$X1, SVM.pos$X2, col="blue", xlab="X1", ylab="X2")
points(SVM.neg$X1, SVM.neg$X2, col="orange")
SVM2 <- svm(as.factor(y)~X1+X2, data, kernel="radial", gamma=1, cost=1)
SVM2.pred <- predict(SVM2, data)
SVM2.pos <- data[SVM2.pred==1,]
SVM2.neg <- data[SVM2.pred==0,]
plot(SVM2.pos$X1, SVM2.pos$X2, col="blue", xlab="X1", ylab="X2")
points(SVM2.neg$X1, SVM2.neg$X2, col="orange")
attach(Auto)
gas_median <- median(Auto$mpg)
new_var <- ifelse(Auto$mpg > gas_median, 1, 0)
Auto$mpglevel <- as.factor(new_var)
tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01,
0.1, 1, 5, 10)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01269231
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.07403846 0.04757358
## 2 0.10 0.05608974 0.04642730
## 3 1.00 0.01269231 0.01783081
## 4 5.00 0.01782051 0.01703462
## 5 10.00 0.02038462 0.01594939
tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma=c(0.1, 1, 5, 10)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.1
##
## - best performance: 0.03070513
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 0.1 0.07903846 0.04582628
## 2 1.0 0.1 0.05628205 0.04813795
## 3 5.0 0.1 0.03070513 0.02024498
## 4 10.0 0.1 0.03070513 0.02649650
## 5 0.1 1.0 0.54070513 0.03076812
## 6 1.0 1.0 0.06121795 0.03435755
## 7 5.0 1.0 0.06115385 0.03212840
## 8 10.0 1.0 0.06115385 0.03212840
## 9 0.1 5.0 0.54070513 0.03076812
## 10 1.0 5.0 0.47435897 0.06395996
## 11 5.0 5.0 0.47429487 0.06885546
## 12 10.0 5.0 0.47429487 0.06885546
## 13 0.1 10.0 0.54070513 0.03076812
## 14 1.0 10.0 0.49980769 0.05307070
## 15 5.0 10.0 0.49724359 0.05299150
## 16 10.0 10.0 0.49724359 0.05299150
tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2,3,4)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5589744
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5691026 0.03113623
## 2 1.0 2 0.5691026 0.03113623
## 3 5.0 2 0.5691026 0.03113623
## 4 10.0 2 0.5589744 0.03735346
## 5 0.1 3 0.5691026 0.03113623
## 6 1.0 3 0.5691026 0.03113623
## 7 5.0 3 0.5691026 0.03113623
## 8 10.0 3 0.5691026 0.03113623
## 9 0.1 4 0.5691026 0.03113623
## 10 1.0 4 0.5691026 0.03113623
## 11 5.0 4 0.5691026 0.03113623
## 12 10.0 4 0.5691026 0.03113623
SVM.lin <- svm(mpglevel~., data=Auto, kernal="linear", cost=1)
SVM.rad <- svm(mpglevel~., data=Auto, kernal="radial", cost=5, gamma=0.1)
SVM.poly <- svm(mpglevel~., data=Auto, kernal="polynomial", cost=10, degree=2)
plotpairs <- function(autofit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(autofit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(SVM.lin)
plotpairs(SVM.rad)
plotpairs(SVM.poly)
attach(OJ)
set.seed(1)
train <- sample(dim(OJ)[1], 800)
OJ.train <- OJ[train, ]
OJ.test <- OJ[-train, ]
SVM.lin <- svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = 0.01)
summary(SVM.lin)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
SVM.pred <- predict(SVM.lin, OJ.train)
table(OJ.train$Purchase, SVM.pred)
## SVM.pred
## CH MM
## CH 420 65
## MM 75 240
(65 + 75)/(420 + 65 + 75 + 240) #train
## [1] 0.175
knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.