8

library(e1071)
library(ISLR)
set.seed(1)
X1 <- runif(500) - 0.5
X2 <- runif(500) - 0.5
y <- 1 * (X1^2 - X2^2 > 0)
plot(X1[y == 0], X2[y == 0], col = "orange", xlab = "X1", ylab = "X2")
points(X1[y == 1], X2[y == 1], col = "blue")

GLM  <-  glm(y ~ X1 + X2, family = binomial)
summary(GLM)

## 
## Call:
## glm(formula = y ~ X1 + X2, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.179  -1.139  -1.112   1.206   1.257  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260   0.089579  -0.974    0.330
## X1           0.196199   0.316864   0.619    0.536
## X2          -0.002854   0.305712  -0.009    0.993
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 692.18  on 499  degrees of freedom
## Residual deviance: 691.79  on 497  degrees of freedom
## AIC: 697.79
## 
## Number of Fisher Scoring iterations: 3

data <- data.frame(X1 = X1, X2 = X2, y = y)
GLM.prob <- predict(GLM, data, type = "response")
GLM.pred <- ifelse(GLM.prob > 0.5, 1, 0)
data.pos <- data[GLM.pred == 1, ]
data.neg <- data[GLM.pred == 0, ]
plot(data.pos$X1, data.pos$X2, col = "blue", xlab = "X1", ylab = "X2")
points(data.neg$X1, data.neg$X2, col = "orange")

GLM1 <- glm(y ~ poly(X1, 2) + poly(X2, 2) + I(X1 * X2), data = data, family = 'binomial')

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(GLM1)

## 
## Call:
## glm(formula = y ~ poly(X1, 2) + poly(X2, 2) + I(X1 * X2), family = "binomial", 
##     data = data)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -8.240e-04  -2.000e-08  -2.000e-08   2.000e-08   1.163e-03  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -102.2     4302.0  -0.024    0.981
## poly(X1, 2)1   2715.3   141109.5   0.019    0.985
## poly(X1, 2)2  27218.5   842987.2   0.032    0.974
## poly(X2, 2)1   -279.7    97160.4  -0.003    0.998
## poly(X2, 2)2 -28693.0   875451.3  -0.033    0.974
## I(X1 * X2)     -206.4    41802.8  -0.005    0.996
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6.9218e+02  on 499  degrees of freedom
## Residual deviance: 3.5810e-06  on 494  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 25

GLM1.prob <- predict(GLM1, data, type = "response")
GLM1.pred <- ifelse(GLM1.prob > 0.5, 1, 0)
data.pos <- data[GLM1.pred == 1, ]
data.neg <- data[GLM1.pred == 0, ]
plot(data.pos$X1, data.pos$X2, col = "blue", xlab = "X1", ylab = "X2")
points(data.neg$X1, data.neg$X2, col = "orange")

SVM <- svm(as.factor(y)~ X1 + X2, data = data, kernal = "linear", cost = 0.1)
SVM.pred <- predict(SVM, data)
SVM.pos <- data[SVM.pred == 1,]
SVM.neg <- data[SVM.pred == 0,]
plot(SVM.pos$X1, SVM.pos$X2, col="blue", xlab="X1", ylab="X2")
points(SVM.neg$X1, SVM.neg$X2, col="orange")

SVM2 <- svm(as.factor(y)~X1+X2, data, kernel="radial", gamma=1, cost=1)
SVM2.pred <- predict(SVM2, data)
SVM2.pos <- data[SVM2.pred==1,]
SVM2.neg <- data[SVM2.pred==0,]
plot(SVM2.pos$X1, SVM2.pos$X2, col="blue", xlab="X1", ylab="X2")
points(SVM2.neg$X1, SVM2.neg$X2, col="orange")

attach(Auto)
gas_median <- median(Auto$mpg)
new_var <- ifelse(Auto$mpg > gas_median, 1, 0)
Auto$mpglevel <- as.factor(new_var)
tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 
    0.1, 1, 5, 10)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.01269231 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1  0.01 0.07403846 0.04757358
## 2  0.10 0.05608974 0.04642730
## 3  1.00 0.01269231 0.01783081
## 4  5.00 0.01782051 0.01703462
## 5 10.00 0.02038462 0.01594939

tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma=c(0.1, 1, 5, 10)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10   0.1
## 
## - best performance: 0.03070513 
## 
## - Detailed performance results:
##    cost gamma      error dispersion
## 1   0.1   0.1 0.07903846 0.04582628
## 2   1.0   0.1 0.05628205 0.04813795
## 3   5.0   0.1 0.03070513 0.02024498
## 4  10.0   0.1 0.03070513 0.02649650
## 5   0.1   1.0 0.54070513 0.03076812
## 6   1.0   1.0 0.06121795 0.03435755
## 7   5.0   1.0 0.06115385 0.03212840
## 8  10.0   1.0 0.06115385 0.03212840
## 9   0.1   5.0 0.54070513 0.03076812
## 10  1.0   5.0 0.47435897 0.06395996
## 11  5.0   5.0 0.47429487 0.06885546
## 12 10.0   5.0 0.47429487 0.06885546
## 13  0.1  10.0 0.54070513 0.03076812
## 14  1.0  10.0 0.49980769 0.05307070
## 15  5.0  10.0 0.49724359 0.05299150
## 16 10.0  10.0 0.49724359 0.05299150

tune.out <- tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2,3,4)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.5589744 
## 
## - Detailed performance results:
##    cost degree     error dispersion
## 1   0.1      2 0.5691026 0.03113623
## 2   1.0      2 0.5691026 0.03113623
## 3   5.0      2 0.5691026 0.03113623
## 4  10.0      2 0.5589744 0.03735346
## 5   0.1      3 0.5691026 0.03113623
## 6   1.0      3 0.5691026 0.03113623
## 7   5.0      3 0.5691026 0.03113623
## 8  10.0      3 0.5691026 0.03113623
## 9   0.1      4 0.5691026 0.03113623
## 10  1.0      4 0.5691026 0.03113623
## 11  5.0      4 0.5691026 0.03113623
## 12 10.0      4 0.5691026 0.03113623

SVM.lin <- svm(mpglevel~., data=Auto, kernal="linear", cost=1)
SVM.rad <- svm(mpglevel~., data=Auto, kernal="radial", cost=5, gamma=0.1)
SVM.poly <- svm(mpglevel~., data=Auto, kernal="polynomial", cost=10, degree=2)
plotpairs <- function(autofit) {
    for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
        plot(autofit, Auto, as.formula(paste("mpg~", name, sep = "")))
    }
}
plotpairs(SVM.lin)

plotpairs(SVM.rad)

plotpairs(SVM.poly)

attach(OJ)
set.seed(1)
train <- sample(dim(OJ)[1], 800)
OJ.train <- OJ[train, ]
OJ.test <- OJ[-train, ]
SVM.lin <- svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = 0.01)
summary(SVM.lin)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

SVM.pred <- predict(SVM.lin, OJ.train)
table(OJ.train$Purchase, SVM.pred)

##     SVM.pred
##       CH  MM
##   CH 420  65
##   MM  75 240

(65 + 75)/(420 + 65 + 75 + 240) #train

## [1] 0.175

knitr::opts_chunk$set(echo = TRUE)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

8

Scott Schier

11/28/2021

R Markdown

Including Plots