This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
###a
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
###b
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2")
points(x1[y == 1], x2[y == 1], )
lm.fit = glm(y ~ x1 + x2, family = binomial)
summary(lm.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.225 -1.217 1.132 1.137 1.143
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.096603 0.089862 1.075 0.282
## x1 -0.008582 0.297431 -0.029 0.977
## x2 0.024951 0.307442 0.081 0.935
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 691.99 on 499 degrees of freedom
## Residual deviance: 691.99 on 497 degrees of freedom
## AIC: 697.99
##
## Number of Fisher Scoring iterations: 3
###d
data = data.frame(x1 = x1, x2 = x2, y = y)
lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.50, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, xlab = "X1", ylab = "X2")
points(data.neg$x1, data.neg$x2, col = "red")
###e
lm.fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
###f
lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, xlab = "X1", ylab = "X2")
points(data.neg$x1, data.neg$x2, col = "red")
###g
###7
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.1.3
attach(Auto)
###a
library(ISLR2)
library(e1071)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.4.0 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.1 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(caret)
## Warning: package 'caret' was built under R version 4.1.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
set.seed(1)
data(Auto)
Auto <- as.tibble(Auto)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## i Please use `as_tibble()` instead.
## i The signature and semantics have changed, see `?as_tibble`.
Auto <- Auto %>%
mutate(highmpg = as.integer(mpg > median(mpg))) %>%
mutate(highmpg = factor(highmpg),
cylinders = factor(cylinders))
Auto %>%
sample_n(5) %>%
select(mpg, highmpg)
## # A tibble: 5 x 2
## mpg highmpg
## <dbl> <fct>
## 1 44.3 1
## 2 23 1
## 3 26 1
## 4 23.9 1
## 5 23.2 1
Auto <- Auto %>%
select(-mpg, -name)
dummy_trans <- dummyVars(highmpg ~ ., data = Auto)
Auto_dummy <- predict(dummy_trans, Auto)
## Warning in model.frame.default(Terms, newdata, na.action = na.action, xlev =
## object$lvls): variable 'highmpg' is not a factor
###b
svm_linear <- train(x = Auto_dummy, y = Auto$highmpg,
method = 'svmLinear2',
trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(cost = seq(1, 20, by = 1)))
svm_linear$finalModel
##
## Call:
## svm.default(x = as.matrix(x), y = y, kernel = "linear", cost = param$cost,
## probability = classProbs)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 2
##
## Number of Support Vectors: 75
There are 75 supporting vectors.
###c
svm_poly <- train(x = Auto_dummy, y = Auto$highmpg,
method = 'svmPoly',
trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(degree = seq(1, 8, by = 1),
C = seq(1, 5, by = 1),
scale = TRUE))
svm_poly$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = TRUE offset = 1
##
## Number of Support Vectors : 71
##
## Objective Function Value : -45.587
## Training error : 0.045918
svm_radial <- train(x = Auto_dummy, y = Auto$highmpg,
method = 'svmRadial',
trControl = trainControl(method = 'cv', number = 10, allowParallel = TRUE),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(C = seq(0.001, 3, length.out = 10),
sigma = seq(0.2, 2, length.out = 5)))
svm_radial$finalModel
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1.00066666666667
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 1.55
##
## Number of Support Vectors : 230
##
## Objective Function Value : -73.7206
## Training error : 0.02551
###d
plot(svm_linear)
plot(svm_poly)
plot(svm_radial)
postResample(predict(svm_linear), Auto$highmpg)
## Accuracy Kappa
## 0.9285714 0.8571429
postResample(predict(svm_poly), Auto$highmpg)
## Accuracy Kappa
## 0.9540816 0.9081633
postResample(predict(svm_radial), Auto$highmpg)
## Accuracy Kappa
## 0.9744898 0.9489796
###8
###a
require(ISLR2); require(tidyverse); require(ggthemes)
require(caret); require(e1071)
set.seed(1)
data('OJ')
inTrain <- sample(nrow(OJ), 800, replace = FALSE)
training <- OJ[inTrain,]
testing <- OJ[-inTrain,]
###b
svm_linear <- svm(Purchase ~ ., data = training,
kernel = 'linear',
cost = 0.01)
summary(svm_linear)
##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
###c
postResample(predict(svm_linear, training), training$Purchase)
## Accuracy Kappa
## 0.8250000 0.6313971
postResample(predict(svm_linear, testing), testing$Purchase)
## Accuracy Kappa
## 0.8222222 0.6082699
###d
svm_linear_tune <- train(Purchase ~ ., data = training,
method = 'svmLinear2',
trControl = trainControl(method = 'cv', number = 10),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(cost = seq(0.01, 10, length.out = 20)))
svm_linear_tune
## Support Vector Machines with Linear Kernel
##
## 800 samples
## 17 predictor
## 2 classes: 'CH', 'MM'
##
## Pre-processing: centered (17), scaled (17)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 721, 720, 720, 720, 721, 719, ...
## Resampling results across tuning parameters:
##
## cost Accuracy Kappa
## 0.0100000 0.8199215 0.6202565
## 0.5357895 0.8273760 0.6360834
## 1.0615789 0.8236101 0.6284665
## 1.5873684 0.8261105 0.6333280
## 2.1131579 0.8261105 0.6333280
## 2.6389474 0.8273605 0.6362121
## 3.1647368 0.8261105 0.6338114
## 3.6905263 0.8248605 0.6309732
## 4.2163158 0.8248605 0.6309732
## 4.7421053 0.8261105 0.6338114
## 5.2678947 0.8273605 0.6361662
## 5.7936842 0.8273605 0.6361662
## 6.3194737 0.8260947 0.6331693
## 6.8452632 0.8260947 0.6331693
## 7.3710526 0.8260947 0.6331693
## 7.8968421 0.8273605 0.6361662
## 8.4226316 0.8273605 0.6361662
## 8.9484211 0.8273605 0.6361662
## 9.4742105 0.8248447 0.6308145
## 10.0000000 0.8248447 0.6308145
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cost = 0.5357895.
###e
postResample(predict(svm_linear_tune, training), training$Purchase)
## Accuracy Kappa
## 0.8350000 0.6524601
postResample(predict(svm_linear_tune, testing), testing$Purchase)
## Accuracy Kappa
## 0.8444444 0.6585983
###f
svm_radial <- svm(Purchase ~ ., data = training,
method = 'radial',
cost = 0.01)
summary(svm_radial)
##
## Call:
## svm(formula = Purchase ~ ., data = training, method = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 634
##
## ( 319 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
postResample(predict(svm_radial, training), training$Purchase)
## Accuracy Kappa
## 0.60625 0.00000
postResample(predict(svm_radial, testing), testing$Purchase)
## Accuracy Kappa
## 0.6222222 0.0000000
svm_radial_tune <- train(Purchase ~ ., data = training,
method = 'svmRadial',
trControl = trainControl(method = 'cv', number = 10),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(C = seq(0.01, 10, length.out = 20),
sigma = 0.05691))
svm_radial_tune
## Support Vector Machines with Radial Basis Function Kernel
##
## 800 samples
## 17 predictor
## 2 classes: 'CH', 'MM'
##
## Pre-processing: centered (17), scaled (17)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 720, 719, 719, 721, 719, 720, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.0100000 0.6062600 0.0000000
## 0.5357895 0.8274369 0.6315267
## 1.0615789 0.8249527 0.6267051
## 1.5873684 0.8199986 0.6165675
## 2.1131579 0.8174982 0.6105624
## 2.6389474 0.8149824 0.6041027
## 3.1647368 0.8112166 0.5964807
## 3.6905263 0.8112166 0.5964807
## 4.2163158 0.8124512 0.5993391
## 4.7421053 0.8137170 0.6021336
## 5.2678947 0.8137174 0.6017074
## 5.7936842 0.8137174 0.6017074
## 6.3194737 0.8124828 0.5988491
## 6.8452632 0.8124828 0.5988491
## 7.3710526 0.8137641 0.6020343
## 7.8968421 0.8112324 0.5967764
## 8.4226316 0.8112324 0.5967764
## 8.9484211 0.8099666 0.5939493
## 9.4742105 0.8124982 0.5992398
## 10.0000000 0.8124982 0.5992398
##
## Tuning parameter 'sigma' was held constant at a value of 0.05691
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.05691 and C = 0.5357895.
postResample(predict(svm_radial_tune, training), training$Purchase)
## Accuracy Kappa
## 0.851250 0.684392
postResample(predict(svm_radial_tune, testing), testing$Purchase)
## Accuracy Kappa
## 0.8185185 0.6040582
###g
svm_poly <- svm(Purchase ~ ., data = training,
method = 'polynomial', degree = 2,
cost = 0.01)
summary(svm_poly)
##
## Call:
## svm(formula = Purchase ~ ., data = training, method = "polynomial",
## degree = 2, cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 634
##
## ( 319 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
postResample(predict(svm_poly, training), training$Purchase)
## Accuracy Kappa
## 0.60625 0.00000
postResample(predict(svm_poly, testing), testing$Purchase)
## Accuracy Kappa
## 0.6222222 0.0000000
svm_poly_tune <- train(Purchase ~ ., data = training,
method = 'svmPoly',
trControl = trainControl(method = 'cv', number = 10),
preProcess = c('center', 'scale'),
tuneGrid = expand.grid(degree = 2,
C = seq(0.01, 10, length.out = 20),
scale = TRUE))
svm_poly_tune
## Support Vector Machines with Polynomial Kernel
##
## 800 samples
## 17 predictor
## 2 classes: 'CH', 'MM'
##
## Pre-processing: centered (17), scaled (17)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 721, 720, 719, 719, 720, 721, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.0100000 0.8249986 0.6233962
## 0.5357895 0.8224662 0.6237344
## 1.0615789 0.8224975 0.6238801
## 1.5873684 0.8162783 0.6107559
## 2.1131579 0.8199816 0.6194664
## 2.6389474 0.8187316 0.6172318
## 3.1647368 0.8187475 0.6166477
## 3.6905263 0.8137625 0.6061815
## 4.2163158 0.8137467 0.6062677
## 4.7421053 0.8162158 0.6109565
## 5.2678947 0.8174816 0.6133548
## 5.7936842 0.8162158 0.6109565
## 6.3194737 0.8149812 0.6080981
## 6.8452632 0.8149812 0.6080981
## 7.3710526 0.8174816 0.6138489
## 7.8968421 0.8187316 0.6162900
## 8.4226316 0.8174816 0.6139101
## 8.9484211 0.8174816 0.6139101
## 9.4742105 0.8174816 0.6139101
## 10.0000000 0.8174816 0.6139101
##
## Tuning parameter 'degree' was held constant at a value of 2
## Tuning
## parameter 'scale' was held constant at a value of TRUE
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 2, scale = TRUE and C = 0.01.
postResample(predict(svm_poly_tune, training), training$Purchase)
## Accuracy Kappa
## 0.850000 0.678295
postResample(predict(svm_poly_tune, testing), testing$Purchase)
## Accuracy Kappa
## 0.8148148 0.5886654
The radial Kernel provided the best results by showing the smallest margin.