Assignment 8

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

5

###a

x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)

###b

plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2")
points(x1[y == 1], x2[y == 1], )

c

lm.fit = glm(y ~ x1 + x2, family = binomial)
summary(lm.fit)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.225  -1.217   1.132   1.137   1.143  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.096603   0.089862   1.075    0.282
## x1          -0.008582   0.297431  -0.029    0.977
## x2           0.024951   0.307442   0.081    0.935
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 691.99  on 499  degrees of freedom
## Residual deviance: 691.99  on 497  degrees of freedom
## AIC: 697.99
## 
## Number of Fisher Scoring iterations: 3

###d

data = data.frame(x1 = x1, x2 = x2, y = y)
lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.50, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, xlab = "X1", ylab = "X2")
points(data.neg$x1, data.neg$x2, col = "red")

###e

lm.fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

###f

lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, xlab = "X1", ylab = "X2")
points(data.neg$x1, data.neg$x2, col = "red")

###g

###7

library(ISLR2)

## Warning: package 'ISLR2' was built under R version 4.1.3

attach(Auto)

###a

library(ISLR2)
library(e1071)
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.1.3

## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.4.0     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.2.1     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## Warning: package 'ggplot2' was built under R version 4.1.3

## Warning: package 'tidyr' was built under R version 4.1.3

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(caret)

## Warning: package 'caret' was built under R version 4.1.3

## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift

library(ggthemes)

## Warning: package 'ggthemes' was built under R version 4.1.3

set.seed(1)

data(Auto)
Auto <- as.tibble(Auto)

## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## i Please use `as_tibble()` instead.
## i The signature and semantics have changed, see `?as_tibble`.

Auto <- Auto %>%
    mutate(highmpg = as.integer(mpg > median(mpg))) %>%
    mutate(highmpg = factor(highmpg),
           cylinders = factor(cylinders))

Auto %>%
    sample_n(5) %>%
    select(mpg, highmpg)

## # A tibble: 5 x 2
##     mpg highmpg
##   <dbl> <fct>  
## 1  44.3 1      
## 2  23   1      
## 3  26   1      
## 4  23.9 1      
## 5  23.2 1

Auto <- Auto %>%
    select(-mpg, -name)

dummy_trans <- dummyVars(highmpg ~ ., data = Auto)
Auto_dummy <- predict(dummy_trans, Auto)

## Warning in model.frame.default(Terms, newdata, na.action = na.action, xlev =
## object$lvls): variable 'highmpg' is not a factor

###b

svm_linear <- train(x = Auto_dummy, y = Auto$highmpg,
                    method = 'svmLinear2',
                    trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
                    preProcess = c('center', 'scale'),
                    tuneGrid = expand.grid(cost = seq(1, 20, by = 1)))
svm_linear$finalModel

## 
## Call:
## svm.default(x = as.matrix(x), y = y, kernel = "linear", cost = param$cost, 
##     probability = classProbs)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  2 
## 
## Number of Support Vectors:  75

There are 75 supporting vectors.

###c

svm_poly <- train(x = Auto_dummy, y = Auto$highmpg,
                  method = 'svmPoly',
                  trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
                  preProcess = c('center', 'scale'),
                  tuneGrid = expand.grid(degree = seq(1, 8, by = 1),
                                         C = seq(1, 5, by = 1),
                                         scale = TRUE))
svm_poly$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Polynomial kernel function. 
##  Hyperparameters : degree =  2  scale =  TRUE  offset =  1 
## 
## Number of Support Vectors : 71 
## 
## Objective Function Value : -45.587 
## Training error : 0.045918

svm_radial <- train(x = Auto_dummy, y = Auto$highmpg,
                  method = 'svmRadial',
                  trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
                  preProcess = c('center', 'scale'),
                  tuneGrid = expand.grid(C = seq(0.001, 3, length.out = 10),
                                         sigma = seq(0.2, 2, length.out = 5)))
svm_radial$finalModel

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1.00066666666667 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  1.55 
## 
## Number of Support Vectors : 230 
## 
## Objective Function Value : -73.7206 
## Training error : 0.02551

###d

plot(svm_linear)

plot(svm_poly)

plot(svm_radial)

postResample(predict(svm_linear), Auto$highmpg)

##  Accuracy     Kappa 
## 0.9285714 0.8571429

postResample(predict(svm_poly), Auto$highmpg)

##  Accuracy     Kappa 
## 0.9540816 0.9081633

postResample(predict(svm_radial), Auto$highmpg)

##  Accuracy     Kappa 
## 0.9744898 0.9489796

###8

###a

require(ISLR2); require(tidyverse); require(ggthemes)
require(caret); require(e1071)
set.seed(1)

data('OJ')

inTrain <- sample(nrow(OJ), 800, replace = FALSE)

training <- OJ[inTrain,]
testing <- OJ[-inTrain,]

###b

svm_linear <- svm(Purchase ~ ., data = training,
                  kernel = 'linear',
                  cost = 0.01)
summary(svm_linear)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

###c

postResample(predict(svm_linear, training), training$Purchase)

##  Accuracy     Kappa 
## 0.8250000 0.6313971

postResample(predict(svm_linear, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.8222222 0.6082699

###d

svm_linear_tune <- train(Purchase ~ ., data = training,
                         method = 'svmLinear2',
                         trControl = trainControl(method = 'cv', number = 10),
                         preProcess = c('center', 'scale'),
                         tuneGrid = expand.grid(cost = seq(0.01, 10, length.out = 20)))
svm_linear_tune

## Support Vector Machines with Linear Kernel 
## 
## 800 samples
##  17 predictor
##   2 classes: 'CH', 'MM' 
## 
## Pre-processing: centered (17), scaled (17) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 721, 720, 720, 720, 721, 719, ... 
## Resampling results across tuning parameters:
## 
##   cost        Accuracy   Kappa    
##    0.0100000  0.8199215  0.6202565
##    0.5357895  0.8273760  0.6360834
##    1.0615789  0.8236101  0.6284665
##    1.5873684  0.8261105  0.6333280
##    2.1131579  0.8261105  0.6333280
##    2.6389474  0.8273605  0.6362121
##    3.1647368  0.8261105  0.6338114
##    3.6905263  0.8248605  0.6309732
##    4.2163158  0.8248605  0.6309732
##    4.7421053  0.8261105  0.6338114
##    5.2678947  0.8273605  0.6361662
##    5.7936842  0.8273605  0.6361662
##    6.3194737  0.8260947  0.6331693
##    6.8452632  0.8260947  0.6331693
##    7.3710526  0.8260947  0.6331693
##    7.8968421  0.8273605  0.6361662
##    8.4226316  0.8273605  0.6361662
##    8.9484211  0.8273605  0.6361662
##    9.4742105  0.8248447  0.6308145
##   10.0000000  0.8248447  0.6308145
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cost = 0.5357895.

###e

postResample(predict(svm_linear_tune, training), training$Purchase)

##  Accuracy     Kappa 
## 0.8350000 0.6524601

postResample(predict(svm_linear_tune, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.8444444 0.6585983

###f

svm_radial <- svm(Purchase ~ ., data = training,
                  method = 'radial',
                  cost = 0.01)
summary(svm_radial)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, method = "radial", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  0.01 
## 
## Number of Support Vectors:  634
## 
##  ( 319 315 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

postResample(predict(svm_radial, training), training$Purchase)

## Accuracy    Kappa 
##  0.60625  0.00000

postResample(predict(svm_radial, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.6222222 0.0000000

svm_radial_tune <- train(Purchase ~ ., data = training,
                         method = 'svmRadial',
                         trControl = trainControl(method = 'cv', number = 10),
                         preProcess = c('center', 'scale'),
                         tuneGrid = expand.grid(C = seq(0.01, 10, length.out = 20),
                                                sigma = 0.05691))
svm_radial_tune

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 800 samples
##  17 predictor
##   2 classes: 'CH', 'MM' 
## 
## Pre-processing: centered (17), scaled (17) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 720, 719, 719, 721, 719, 720, ... 
## Resampling results across tuning parameters:
## 
##   C           Accuracy   Kappa    
##    0.0100000  0.6062600  0.0000000
##    0.5357895  0.8274369  0.6315267
##    1.0615789  0.8249527  0.6267051
##    1.5873684  0.8199986  0.6165675
##    2.1131579  0.8174982  0.6105624
##    2.6389474  0.8149824  0.6041027
##    3.1647368  0.8112166  0.5964807
##    3.6905263  0.8112166  0.5964807
##    4.2163158  0.8124512  0.5993391
##    4.7421053  0.8137170  0.6021336
##    5.2678947  0.8137174  0.6017074
##    5.7936842  0.8137174  0.6017074
##    6.3194737  0.8124828  0.5988491
##    6.8452632  0.8124828  0.5988491
##    7.3710526  0.8137641  0.6020343
##    7.8968421  0.8112324  0.5967764
##    8.4226316  0.8112324  0.5967764
##    8.9484211  0.8099666  0.5939493
##    9.4742105  0.8124982  0.5992398
##   10.0000000  0.8124982  0.5992398
## 
## Tuning parameter 'sigma' was held constant at a value of 0.05691
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.05691 and C = 0.5357895.

postResample(predict(svm_radial_tune, training), training$Purchase)

## Accuracy    Kappa 
## 0.851250 0.684392

postResample(predict(svm_radial_tune, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.8185185 0.6040582

###g

svm_poly <- svm(Purchase ~ ., data = training,
                  method = 'polynomial', degree = 2,
                  cost = 0.01)
summary(svm_poly)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, method = "polynomial", 
##     degree = 2, cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  0.01 
## 
## Number of Support Vectors:  634
## 
##  ( 319 315 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

postResample(predict(svm_poly, training), training$Purchase)

## Accuracy    Kappa 
##  0.60625  0.00000

postResample(predict(svm_poly, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.6222222 0.0000000

svm_poly_tune <- train(Purchase ~ ., data = training,
                         method = 'svmPoly',
                         trControl = trainControl(method = 'cv', number = 10),
                         preProcess = c('center', 'scale'),
                         tuneGrid = expand.grid(degree = 2,
                                         C = seq(0.01, 10, length.out = 20),
                                         scale = TRUE))
svm_poly_tune

## Support Vector Machines with Polynomial Kernel 
## 
## 800 samples
##  17 predictor
##   2 classes: 'CH', 'MM' 
## 
## Pre-processing: centered (17), scaled (17) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 721, 720, 719, 719, 720, 721, ... 
## Resampling results across tuning parameters:
## 
##   C           Accuracy   Kappa    
##    0.0100000  0.8249986  0.6233962
##    0.5357895  0.8224662  0.6237344
##    1.0615789  0.8224975  0.6238801
##    1.5873684  0.8162783  0.6107559
##    2.1131579  0.8199816  0.6194664
##    2.6389474  0.8187316  0.6172318
##    3.1647368  0.8187475  0.6166477
##    3.6905263  0.8137625  0.6061815
##    4.2163158  0.8137467  0.6062677
##    4.7421053  0.8162158  0.6109565
##    5.2678947  0.8174816  0.6133548
##    5.7936842  0.8162158  0.6109565
##    6.3194737  0.8149812  0.6080981
##    6.8452632  0.8149812  0.6080981
##    7.3710526  0.8174816  0.6138489
##    7.8968421  0.8187316  0.6162900
##    8.4226316  0.8174816  0.6139101
##    8.9484211  0.8174816  0.6139101
##    9.4742105  0.8174816  0.6139101
##   10.0000000  0.8174816  0.6139101
## 
## Tuning parameter 'degree' was held constant at a value of 2
## Tuning
##  parameter 'scale' was held constant at a value of TRUE
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = TRUE and C = 0.01.

postResample(predict(svm_poly_tune, training), training$Purchase)

## Accuracy    Kappa 
## 0.850000 0.678295

postResample(predict(svm_poly_tune, testing), testing$Purchase)

##  Accuracy     Kappa 
## 0.8148148 0.5886654

The radial Kernel provided the best results by showing the smallest margin.

Assignment 8

Alejandro Perez

2023-04-28

R Markdown

5

c