Chapter 09 (page 398): 5, 7, 8
libraries:
library(ggplot2)
library(e1071)
library(caret)
## Loading required package: lattice
library(ISLR)
library(knitr)
set.seed(1)
x1 <-runif(500)- 0.5
x2 <-runif(500)- 0.5
y <- 1 * (x1^2- x2^2 > 0)
plot <- ggplot(mapping = aes(x = x1, y = x2, color = factor(y))) +
geom_point()+
labs(title = "Two Classes with a Quadratic Decision Boundary", color = "Class")
plot
model1 <- glm(y ~ x1 + x2, family = binomial)
summary(model1)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
preds <- predict(model1, type = "response")
class <- as.numeric(preds > 0.5)
ggplot(mapping = aes(x = x1, y = x2, color = factor(class)))+
geom_point() +
labs(title = "Observations of Predicted Class Labels using Logistic Regression", color = "Class")
x1sq <- x1^2
x2sq <- x2^2
x1multx2 <- x1*x2
logx2 <- log(abs(x2)+1)
model2 <- glm(y ~ x1 + x2 + x1sq + x2sq + x1multx2 + logx2, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model2)
##
## Call:
## glm(formula = y ~ x1 + x2 + x1sq + x2sq + x1multx2 + logx2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.158e-04 -2.000e-08 -2.000e-08 2.000e-08 8.576e-04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.72 1034.50 -0.011 0.991
## x1 31.41 13994.69 0.002 0.998
## x2 -59.56 14371.79 -0.004 0.997
## x1sq 16134.97 503018.56 0.032 0.974
## x2sq -16191.36 501731.30 -0.032 0.974
## x1multx2 -204.40 39474.29 -0.005 0.996
## logx2 61.80 23014.10 0.003 0.998
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.3241e-06 on 493 degrees of freedom
## AIC: 14
##
## Number of Fisher Scoring iterations: 25
predsnonlin <- predict(model2, type = "response")
classnonlin <- as.numeric(predsnonlin > 0.5)
ggplot(mapping = aes(x = x1, y = x2, color = factor(classnonlin)))+
geom_point() +
labs(title = "Observations of Predicted Class Labels Using Non-Linear Logistic Regression", color = "Class")
svm1 <- svm(as.factor(y) ~ x1 + x2, kernel = "linear")
predssvm <- predict(svm1)
confusionMatrix(predssvm, as.factor(y))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 261 239
## 1 0 0
##
## Accuracy : 0.522
## 95% CI : (0.4772, 0.5665)
## No Information Rate : 0.522
## P-Value [Acc > NIR] : 0.5181
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000
## Specificity : 0.000
## Pos Pred Value : 0.522
## Neg Pred Value : NaN
## Prevalence : 0.522
## Detection Rate : 0.522
## Detection Prevalence : 1.000
## Balanced Accuracy : 0.500
##
## 'Positive' Class : 0
##
ggplot(mapping = aes(x = x1, y = x2, color = predssvm))+
geom_point()+
labs(title= "Observations of Predicted Class Labels Using Linear SVM", color = "Class")
svm2 <- svm(as.factor(y) ~ x1 + x2, kernel = "radial")
predssvm2 <- predict(svm2)
confusionMatrix(predssvm2, as.factor(y))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 257 13
## 1 4 226
##
## Accuracy : 0.966
## 95% CI : (0.9461, 0.9801)
## No Information Rate : 0.522
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.9318
##
## Mcnemar's Test P-Value : 0.05235
##
## Sensitivity : 0.9847
## Specificity : 0.9456
## Pos Pred Value : 0.9519
## Neg Pred Value : 0.9826
## Prevalence : 0.5220
## Detection Rate : 0.5140
## Detection Prevalence : 0.5400
## Balanced Accuracy : 0.9651
##
## 'Positive' Class : 0
##
ggplot(mapping = aes(x = x1, y = x2, color = predssvm2))+
geom_point()+
labs(title = "Observations of Predicted Class Labels Using Non-Linear SVM", color = "Class")
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
head(Auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
median_mpg <- median(Auto$mpg)
Auto$median_mileage <- ifelse(Auto$mpg > median_mpg, 1, 0)
head(Auto$median_mileage)
## [1] 0 0 0 0 0 0
Auto_data <- Auto[, -which(names(Auto) == "mpg")]
Auto_data$median_mileage <- Auto$median_mileage
set.seed(1)
svm_linear <- tune(svm,median_mileage~., data = Auto_data, kernel = "linear", ranges = list(cost = c(0.1, 1, 10, 100, 1000)))
result <- svm_linear$best.model
print(result)
##
## Call:
## best.tune(METHOD = svm, train.x = median_mileage ~ ., data = Auto_data,
## ranges = list(cost = c(0.1, 1, 10, 100, 1000)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 1
## gamma: 0.003215434
## epsilon: 0.1
##
##
## Number of Support Vectors: 300
cv_results <- svm_linear$performances
print(cv_results)
## cost error dispersion
## 1 1e-01 0.10227373 0.03634911
## 2 1e+00 0.09603609 0.03666741
## 3 1e+01 0.10531309 0.03683207
## 4 1e+02 0.12079079 0.03864160
## 5 1e+03 0.12724775 0.03878303
svm_radial <- tune(svm, median_mileage~. , data = Auto_data, kernel = "radial", ranges = list(cost = c(0.1, 1, 10, 100), gamma = c(0.01, 0.1, 1)))
svm_poly <- tune(svm,median_mileage~., data = Auto_data, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 10), degree = c(2, 3, 4)))
radial_best = svm_radial$best.model
print(radial_best)
##
## Call:
## best.tune(METHOD = svm, train.x = median_mileage ~ ., data = Auto_data,
## ranges = list(cost = c(0.1, 1, 10, 100), gamma = c(0.01, 0.1,
## 1)), kernel = "radial")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 10
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 248
cv_results_radial <- svm_radial$performances
print(cv_results_radial)
## cost gamma error dispersion
## 1 0.1 0.01 0.09892313 0.03645072
## 2 1.0 0.01 0.08595536 0.04188922
## 3 10.0 0.01 0.08259942 0.04393380
## 4 100.0 0.01 0.07241060 0.03426497
## 5 0.1 0.10 0.07235584 0.03733974
## 6 1.0 0.10 0.07119458 0.03898281
## 7 10.0 0.10 0.06087625 0.03126506
## 8 100.0 0.10 0.08159685 0.03851486
## 9 0.1 1.00 0.31355898 0.04307895
## 10 1.0 1.00 0.09703152 0.01912304
## 11 10.0 1.00 0.10266191 0.02117649
## 12 100.0 1.00 0.10266216 0.02117675
poly_best <- svm_poly$best.model
print(poly_best)
##
## Call:
## best.tune(METHOD = svm, train.x = median_mileage ~ ., data = Auto_data,
## ranges = list(cost = c(0.1, 1, 10), degree = c(2, 3, 4)), kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 10
## degree: 2
## gamma: 0.003215434
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 392
cv_results_poly <- svm_poly$performances
print(cv_results_poly)
## cost degree error dispersion
## 1 0.1 2 0.5352436 0.07323902
## 2 1.0 2 0.5142741 0.07767385
## 3 10.0 2 0.3716700 0.09973637
## 4 0.1 3 0.5370129 0.07284751
## 5 1.0 3 0.5316792 0.07354908
## 6 10.0 3 0.4814037 0.08122631
## 7 0.1 4 0.5375866 0.07277105
## 8 1.0 4 0.5374704 0.07280785
## 9 10.0 4 0.5362994 0.07318721
set.seed(1)
Auto_data$median_mileage <- as.factor(Auto_data$median_mileage)
vars <- list(c("weight", "horsepower"),
c("displacement", "weight"),
c("acceleration", "cylinders"))
bestmodel_linear <- svm(median_mileage ~ ., data = Auto_data, kernel = "linear", cost = 1)
bestmodel_radial <- svm(median_mileage ~ ., data = Auto_data, kernel = "radial", cost = 10, gamma = 0.1)
bestmodel_poly <- svm(median_mileage ~ ., data = Auto_data, kernel = "polynomial", cost = 10, degree = 2, scale = 1)
## Warning in any(scale): coercing argument of type 'double' to logical
for (pair in vars) {
formula <- as.formula(paste(pair[1], "~", pair[2]))
plot(bestmodel_linear, Auto_data, formula, cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Linear SVM:", pair[1], "~", pair[2]),
side = 3, line = .5, adj = 0.3, cex = 1)
plot(bestmodel_radial, Auto_data, formula, cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Radial SVM:", pair[1], "~", pair[2]),
side = 3, line = .5, adj = 0.3, cex = 1)
plot(bestmodel_poly, Auto_data, formula, cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Poly SVM:", pair[1], "~", pair[2]),
side = 3, line = .5, adj = 0.3, cex = 1)
par(mfrow = c(1, 1))
}
attach(OJ)
set.seed(1)
sample <- sample(nrow(OJ), 800)
train <- OJ[sample,]
test <- OJ[-sample,]
svm_purch <- svm(Purchase~., data = train, kernel = 'linear', cost = 0.01)
summary(svm_purch)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_preds <- predict(svm_purch, train)
test_preds <- predict(svm_purch, test)
train_error <- mean(train_preds != train$Purchase)
test_error <- mean(test_preds != test$Purchase)
train_error
## [1] 0.175
test_error
## [1] 0.1777778
set.seed(1)
tune_purch <- tune(svm, Purchase ~., data = train, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_purch)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
svm_new <- svm(Purchase ~., kernel = "linear", data = train, cost = tune_purch$best.parameters$cost)
train_preds1 <- predict(svm_new, train)
test_preds1 <- predict(svm_new, test)
train_error1 <- mean(train_preds1 != train$Purchase)
test_error1 <- mean(test_preds1!= test$Purchase)
train_error1
## [1] 0.165
test_error1
## [1] 0.162963
set.seed(1)
svm_rad_new <- svm(Purchase~., data = train, kernel = "radial", cost = 0.01)
summary(svm_rad_new)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 634
##
## ( 319 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_preds2 <- predict(svm_rad_new, train)
test_preds2 <- predict(svm_rad_new, test)
train_error2 <- mean(train_preds2 != train$Purchase)
test_error2 <- mean(test_preds2 != test$Purchase)
train_error2
## [1] 0.39375
test_error2
## [1] 0.3777778
set.seed(1)
tune_purch2 <- tune(svm, Purchase ~., data = train, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_purch2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.10 0.18625 0.02853482
## 3 1.00 0.17125 0.02128673
## 4 10.00 0.18625 0.02853482
svm_new3 <- svm(Purchase ~., kernel = "radial", data = train, cost = tune_purch2$best.parameters$cost)
train_preds3 <- predict(svm_new3, train)
test_preds3 <- predict(svm_new3, test)
train_error3 <- mean(train_preds3 != train$Purchase)
test_error3 <- mean(test_preds3!= test$Purchase)
train_error3
## [1] 0.15125
test_error3
## [1] 0.1851852
svm_poly_new <- svm(Purchase ~., kernel = "polynomial", data = train, degree = 2, cost = .01)
summary(svm_poly_new)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "polynomial",
## degree = 2, cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 636
##
## ( 321 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_preds4 <- predict(svm_poly_new, train)
test_preds4 <- predict(svm_poly_new, test)
train_error4 <- mean(train_preds4 != train$Purchase)
test_error4 <- mean(test_preds4 != test$Purchase)
train_error4
## [1] 0.3725
test_error4
## [1] 0.3666667
set.seed(1)
tune_purch3 <- tune(svm, Purchase ~., data = train, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_purch3)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.185
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.37125 0.03537988
## 2 0.10 0.28750 0.05068969
## 3 1.00 0.18500 0.02415229
## 4 10.00 0.19500 0.03184162
svm_new5 <- svm(Purchase ~., kernel = "polynomial", data = train, cost = tune_purch3$best.parameters$cost)
train_preds5 <- predict(svm_new5, train)
test_preds5 <- predict(svm_new5, test)
train_error5 <- mean(train_preds5 != train$Purchase)
test_error5 <- mean(test_preds5!= test$Purchase)
train_error5
## [1] 0.15375
test_error5
## [1] 0.2222222
svm_results <- data.frame(
Kernel = c("Linear", "Radial", "Polynomial (deg = 2)"),
Optimal_Cost = c("~0.10", "~1.0", "~1.0"),
Training_Error = c("16.5%", "15.13%", "15.38%"),
Test_Error = c("16.3%", "18.52%", "22.22%")
)
kable(svm_results, caption = "SVM Performance Summary for OJ Dataset")
Kernel | Optimal_Cost | Training_Error | Test_Error |
---|---|---|---|
Linear | ~0.10 | 16.5% | 16.3% |
Radial | ~1.0 | 15.13% | 18.52% |
Polynomial (deg = 2) | ~1.0 | 15.38% | 22.22% |