ANSWER 5a: Code shown below:
require(ISLR)
## Loading required package: ISLR
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(caret)
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
require(ggplot2)
require(tidyr)
## Loading required package: tidyr
set.seed(421)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)
ANSWER 5b: Plot shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
plot(x1[y == 0], x2[y == 0], col = "orange", xlab = "X1", ylab = "X2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)
ANSWER 5c: Logistic Regression Model shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
lm_fit = glm(y ~ x1 + x2, family = binomial)
summary(lm_fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.278 -1.227 1.089 1.135 1.175
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.11999 0.08971 1.338 0.181
## x1 -0.16881 0.30854 -0.547 0.584
## x2 -0.08198 0.31476 -0.260 0.795
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 691.35 on 499 degrees of freedom
## Residual deviance: 690.99 on 497 degrees of freedom
## AIC: 696.99
##
## Number of Fisher Scoring iterations: 3
require(tidyr)
ANSWER 5d: Observations plotted shown:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
data = data.frame(x1 = x1, x2 = x2, y = y)
lm_prob = predict(lm_fit, data, type = "response")
lm_pred = ifelse(lm_prob > 0.52, 1, 0)
data_pos = data[lm_pred == 1, ]
data_neg = data[lm_pred == 0, ]
plot(data_pos$x1, data_pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data_neg$x1, data_neg$x2, col = "orange", pch = 4)
ANSWER 5e: Logistic Regression Model is fitted below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
lm_fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)
summary(lm_fit)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.003575 0.000000 0.000000 0.000000 0.003720
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 236.09 34920.61 0.007 0.995
## poly(x1, 2)1 3608.97 246381.97 0.015 0.988
## poly(x1, 2)2 88150.22 1333540.93 0.066 0.947
## poly(x2, 2)1 3256.75 177352.91 0.018 0.985
## poly(x2, 2)2 -87128.37 1164195.57 -0.075 0.940
## I(x1 * x2) -33.23 446735.64 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9135e+02 on 499 degrees of freedom
## Residual deviance: 3.3069e-05 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
ANSWER 5f: Model applied to training data as shown:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
lm_prob = predict(lm_fit, data, type = "response")
lm_pred = ifelse(lm_prob > 0.5, 1, 0)
data_pos = data[lm_pred == 1, ]
data_neg = data[lm_pred == 0, ]
plot(data_pos$x1, data_pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data_neg$x1, data_neg$x2, col = "orange", pch = 4)
ANSWER 5g: The support vector classifier is fitted with X1 and X2 as predictors shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
require(e1071)
## Loading required package: e1071
svm_fit = svm(as.factor(y) ~ x1 + x2, data, kernel = "linear", cost = 0.1)
svm_pred = predict(svm_fit, data)
data_pos = data[svm_pred == 1, ]
data_neg = data[svm_pred == 0, ]
plot(data_pos$x1, data_pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data_neg$x1, data_neg$x2, col = "orange", pch = 4)
ANSWER 5h: SVM is fitted using a non-linear kernel to the data as shown.
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
svm_fit = svm(as.factor(y) ~ x1 + x2, data, gamma = 1)
svm_pred = predict(svm_fit, data)
data_pos = data[svm_pred == 1, ]
data_neg = data[svm_pred == 0, ]
plot(data_pos$x1, data_pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data_neg$x1, data_neg$x2, col = "orange", pch = 4)
ANSWER 5i: As previously shown, SVM with non-linear kernel and Logistic Regression with interaction terms are very useful for finding non-linear decision boundaries.
ANSWER 7a: Binary variable shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
Auto %>% as_tibble() %>% mutate(above_median = as.factor( ifelse(mpg >= median(mpg), 1, 0) ) ) -> auto
ANSWER 7b: As shown below the lowest error is when cost = 1 .
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
set.seed(1)
auto %>%
tune(svm, above_median ~ ., data = ., kernel = 'linear', ranges = list(cost = c(0.01, 0.1, 1, 10, 100))) -> auto_svc
summary(auto_svc)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 1e+01 0.02051282 0.02648194
## 5 1e+02 0.03076923 0.03151981
ANSWER 7c:
As shown below, for radial kernel we have the lowest error when gamma = 0.01 and cost = 10. However, with the polynomial kernel the lowest error is shown with degree = 2 and cost = 10.
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
set.seed(1)
auto %>% tune(svm, above_median ~ ., data = ., kernel = 'radial', ranges = list(gamma = c(0.01, 0.1, 1, 10, 100), cost = c(.01, .1, 1, 10))) -> auto_svm_radial
summary(auto_svm_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.01 10
##
## - best performance: 0.02557692
##
## - Detailed performance results:
## gamma cost error dispersion
## 1 1e-02 0.01 0.55115385 0.04366593
## 2 1e-01 0.01 0.21711538 0.09865227
## 3 1e+00 0.01 0.55115385 0.04366593
## 4 1e+01 0.01 0.55115385 0.04366593
## 5 1e+02 0.01 0.55115385 0.04366593
## 6 1e-02 0.10 0.08929487 0.04382379
## 7 1e-01 0.10 0.07903846 0.03874545
## 8 1e+00 0.10 0.55115385 0.04366593
## 9 1e+01 0.10 0.55115385 0.04366593
## 10 1e+02 0.10 0.55115385 0.04366593
## 11 1e-02 1.00 0.07403846 0.03522110
## 12 1e-01 1.00 0.05371795 0.03525162
## 13 1e+00 1.00 0.06384615 0.04375618
## 14 1e+01 1.00 0.51794872 0.05063697
## 15 1e+02 1.00 0.55115385 0.04366593
## 16 1e-02 10.00 0.02557692 0.02093679
## 17 1e-01 10.00 0.03076923 0.03375798
## 18 1e+00 10.00 0.05884615 0.04020934
## 19 1e+01 10.00 0.51794872 0.04917316
## 20 1e+02 10.00 0.55115385 0.04366593
auto %>% tune(svm, above_median ~ ., data = ., kernel = 'polynomial',ranges = list(degree = seq(2, 5), cost = c(.01, .1, 1, 10))) -> auto_svm_poly
summary(auto_svm_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## degree cost
## 2 10
##
## - best performance: 0.5841667
##
## - Detailed performance results:
## degree cost error dispersion
## 1 2 0.01 0.6019231 0.06346118
## 2 3 0.01 0.6019231 0.06346118
## 3 4 0.01 0.6019231 0.06346118
## 4 5 0.01 0.6019231 0.06346118
## 5 2 0.10 0.6019231 0.06346118
## 6 3 0.10 0.6019231 0.06346118
## 7 4 0.10 0.6019231 0.06346118
## 8 5 0.10 0.6019231 0.06346118
## 9 2 1.00 0.6019231 0.06346118
## 10 3 1.00 0.6019231 0.06346118
## 11 4 1.00 0.6019231 0.06346118
## 12 5 1.00 0.6019231 0.06346118
## 13 2 10.00 0.5841667 0.07806609
## 14 3 10.00 0.6019231 0.06346118
## 15 4 10.00 0.6019231 0.06346118
## 16 5 10.00 0.6019231 0.06346118
ANSWER 7d: Plots shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
require(stringr)
## Loading required package: stringr
svm_linr <- svm(above_median ~ ., data = auto, kernel = 'linear', cost = 1)
svm_poly <- svm(above_median ~ ., data = auto, kernel = 'polynomial', degree = 2, cost = 10)
svm_radl <- svm(above_median ~ ., data = auto, kernel = 'radial', gamma = 0.01, cost = 10)
plot_pairs <- function(fit, data, dependent, independents) {
for (independent in independents) {
formula = as.formula( str_c( dependent, '~', independent) )
plot(fit, data, formula)
}
}
plot_pairs(svm_linr, auto, 'mpg', c('acceleration', 'displacement', 'horsepower'))
ANSWER 8a: Traning set created oj_train_samples shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
require(modelr)
## Loading required package: modelr
set.seed(1)
oj_train_samples <-OJ %>% resample_partition(c(train = .8, test = .2))
summary(oj_train_samples)
## Length Class Mode
## train 2 resample list
## test 2 resample list
ANSWER 8b: As shown below there are 471 Support Vectors, 226 are connected to class CH and 224 are connected to class MM.
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
oj_linr_svc <- svm(Purchase ~ ., data = oj_train_samples$train, kernel = 'linear', cost = 0.01)
summary(oj_linr_svc)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_samples$train, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 450
##
## ( 226 224 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
ANSWER 8c: Train(0.157) and Test(0.205) Error Rates are shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
oj_train_samples$train %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_linr_svc, newdata = .)) %>% summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.157
oj_train_samples$test %>%as_tibble() %>% mutate(Purchase_prime = predict(oj_linr_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.205
ANSWER 8d: The best parameters are shown below at cost = 0.015625
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
set.seed(1)
tune(svm, Purchase ~ ., data = as_tibble( oj_train_samples$train ), kernel = 'linear', ranges = list(cost = 2^seq(-8,4))) -> oj_svc_tune
summary(oj_svc_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.015625
##
## - best performance: 0.1603557
##
## - Detailed performance results:
## cost error dispersion
## 1 0.00390625 0.1638577 0.03795702
## 2 0.00781250 0.1626813 0.03729402
## 3 0.01562500 0.1603557 0.03840728
## 4 0.03125000 0.1650205 0.03522022
## 5 0.06250000 0.1638988 0.03695773
## 6 0.12500000 0.1615458 0.03716560
## 7 0.25000000 0.1615595 0.03601742
## 8 0.50000000 0.1603967 0.03777655
## 9 1.00000000 0.1650479 0.03445874
## 10 2.00000000 0.1662244 0.03721419
## 11 4.00000000 0.1627086 0.03446296
## 12 8.00000000 0.1627223 0.03666065
## 13 16.00000000 0.1638851 0.03643699
ANSWER 8e: Train and Test Error Rates are shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
oj_linr_svc <- svm( Purchase ~ ., data = oj_train_samples$train,kernel = 'linear',cost = oj_svc_tune$best.parameters$cost)
oj_train_samples$train %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_linr_svc)) %>% summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.152
oj_train_samples$test %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_linr_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.2
ANSWER 8f: Parts (b) through (e) using SVM with Radial Kernel are shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
oj_radl_svc <- svm( Purchase ~ ., data = oj_train_samples$train, kernel = 'radial')
summary(oj_radl_svc)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_samples$train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 390
##
## ( 201 189 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
oj_train_samples$train %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_radl_svc, newdata = .)) %>% summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.145
oj_train_samples$test %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_radl_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.186
set.seed(1)
tune(svm,Purchase ~ .,data = as_tibble( oj_train_samples$train ),kernel = 'radial',ranges = list(cost = 2^seq(-8,4))) -> oj_radl_tune
summary(oj_radl_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1673461
##
## - Detailed performance results:
## cost error dispersion
## 1 0.00390625 0.3764843 0.06228157
## 2 0.00781250 0.3764843 0.06228157
## 3 0.01562500 0.3764843 0.06228157
## 4 0.03125000 0.3366211 0.07569987
## 5 0.06250000 0.1918605 0.04001821
## 6 0.12500000 0.1755267 0.03617482
## 7 0.25000000 0.1755404 0.03986210
## 8 0.50000000 0.1720246 0.04204614
## 9 1.00000000 0.1673461 0.03585901
## 10 2.00000000 0.1720520 0.04109274
## 11 4.00000000 0.1731874 0.04013555
## 12 8.00000000 0.1825581 0.03589686
## 13 16.00000000 0.1837346 0.03185391
oj_radl_svc <- svm(Purchase ~ .,data = oj_train_samples$train,kernel = 'linear',cost = oj_radl_tune$best.parameters$cost)
oj_train_samples$train %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_radl_svc)) %>% summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.152
oj_train_samples$test %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_radl_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.2
ANSWER 8g: Parts (b) through (e) using SVM with polynomial Kernel( degree = 2) are shown below:
require(ISLR)
require(dplyr)
require(caret)
require(ggplot2)
require(tidyr)
oj_poly_svc <- svm(Purchase ~ ., data = oj_train_samples$train, kernel = 'polynomial',degree = 2)
summary(oj_poly_svc)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_samples$train, kernel = "polynomial",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 467
##
## ( 238 229 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
oj_train_samples$train %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_poly_svc, newdata = .)) %>% summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.174
oj_train_samples$test %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_poly_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.214
set.seed(1)
tune(svm,Purchase ~ .,data = as_tibble( oj_train_samples$train ),kernel = 'polynomial',ranges = list(cost = 2^seq(-8,4)),degree = 2) -> oj_poly_tune
summary(oj_poly_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 8
##
## - best performance: 0.1708208
##
## - Detailed performance results:
## cost error dispersion
## 1 0.00390625 0.3764843 0.06228157
## 2 0.00781250 0.3776471 0.06301831
## 3 0.01562500 0.3495075 0.07242409
## 4 0.03125000 0.3389740 0.06929994
## 5 0.06250000 0.3179617 0.06929113
## 6 0.12500000 0.3016005 0.07149584
## 7 0.25000000 0.2268673 0.03547776
## 8 0.50000000 0.2012312 0.04937234
## 9 1.00000000 0.1918194 0.04481021
## 10 2.00000000 0.1848290 0.04773197
## 11 4.00000000 0.1801505 0.04354956
## 12 8.00000000 0.1708208 0.04408733
## 13 16.00000000 0.1732011 0.04348912
oj_poly_svc <- svm(Purchase ~ .,data = oj_train_samples$train,kernel = 'polynomial',cost = oj_poly_tune$best.parameters$cost)
oj_train_samples$train %>% as_tibble() %>%mutate(Purchase_prime = predict(oj_poly_svc)) %>%summarize('Train Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Train Error Rate`
## <dbl>
## 1 0.137
oj_train_samples$test %>% as_tibble() %>% mutate(Purchase_prime = predict(oj_poly_svc, newdata = .)) %>% summarize('Test Error Rate' = mean(Purchase != Purchase_prime))
## # A tibble: 1 x 1
## `Test Error Rate`
## <dbl>
## 1 0.219
ANSWER 8h: Overall we can tell that the Radial Kernel approach gives the best results since our train and test error rates were the lowest compared to Linear and Polynomial kernels.