set.seed(1)
x <- rnorm(100)
y <- 2+ x^2 + 4 + rnorm(100)
class <- sample(100, 50)
y[class] <- y[class] + 3
y[-class] <- y[-class] - 3
plot(x, y)
#creating binary class variable for classification model
threshold <- 5
z <- ifelse(y > threshold, 1, 0)
# Create data frame, converting class variable to factor
dat <- data.frame(x = x, y = y, z = as.factor(z))
# Splitting data
indices <- sample(nrow(dat), 0.8 * nrow(dat))
train_data <- dat[indices, ]
test_data <- dat[-indices, ]
# Train SVM model
svmfit <- svm(z ~ ., data = train_data, kernel = "linear", cost = 10)
# Plot SVM decision boundary using training data
plot(svmfit, data = train_data)
zpred<-predict(svmfit, test_data)
table(predict = zpred , truth = test_data$z)
## truth
## predict 0 1
## 0 8 0
## 1 0 12
Not bad! Looks like this model only made 1 error classifying a 1 as a 0.
svmfit1<-svm(z ~ ., data = train_data, kernel = "radial", gamma = 1, cost = 10)
plot(svmfit1, train_data)
zpred1<-predict(svmfit1, test_data)
table(predict = zpred1, truth = test_data$z)
## truth
## predict 0 1
## 0 8 1
## 1 0 11
svmfit2<-svm(z ~ ., data = train_data, kernel = 'polynomial', scale = FALSE)
plot(svmfit2, train_data)
zpred2<-predict(svmfit2, test_data)
table(predict = zpred2, truth = test_data$z)
## truth
## predict 0 1
## 0 8 0
## 1 0 12
In this context, it appears the
set.seed(1)
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
# Plot the observations, colored according to their class labels
plot(data$x1, data$x2, col = as.numeric(data$y), pch = 16, xlab = "X1", ylab = "X2", main = "Observations Colored by Class Labels")
legend("topright", legend = levels(data$y), col = 1:length(levels(data$y)), pch = 16, title = "Class Labels")
(c) Fit a logistic regression model to the data, using X1 and X2 as
predictors.
log_mod<-glm(y ~ x1 + x2, family = "binomial")
summary(log_mod)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
log_pred<-predict(log_mod, newdata = data, type = "response")
preds<-ifelse(log_pred> 0.5, 1, 0)
class_colors <- c("blue", "red")
plot(data$x1, data$x2, col = class_colors[preds + 1], pch = 16,
xlab = "X1", ylab = "X2",
main = "Observations Colored by Predicted Class Labels")
log_mod2<-glm(y ~ log(x1) + poly(x2, 2), data = data, family = "binomial")
## Warning in log(x1): NaNs produced
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(log_mod2)
##
## Call:
## glm(formula = y ~ log(x1) + poly(x2, 2), family = "binomial",
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.8383 1.7773 5.535 3.11e-08 ***
## log(x1) 6.7296 1.2040 5.589 2.28e-08 ***
## poly(x2, 2)1 0.2358 6.8488 0.034 0.973
## poly(x2, 2)2 -97.5814 16.6211 -5.871 4.33e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 318.830 on 229 degrees of freedom
## Residual deviance: 69.063 on 226 degrees of freedom
## (270 observations deleted due to missingness)
## AIC: 77.063
##
## Number of Fisher Scoring iterations: 8
log_pred3<-predict(log_mod2, newdata = data, type = "response")
## Warning in log(x1): NaNs produced
preds3<-ifelse(log_pred3> 0.5, 1, 0)
class_colors <- c("blue", "red")
plot(data$x1, data$x2, col = class_colors[preds3 + 1], pch = 16,
xlab = "X1", ylab = "X2",
main = "Observations Colored by Predicted Class Labels")
svmod<- svm(y ~ ., data = data, kernel = "linear", cost = 10)
# Plot SVM decision boundary using training data
plot(svmod, data = data, color.palette = rainbow)
modpred<-predict(svmod, data)
table(predict = modpred, truth = y)
## truth
## predict 0 1
## 0 261 239
## 1 0 0
poly_svm<-svm(y ~ ., data = data, kernel = "radial", cost = 10)
# Plot SVM decision boundary using training data
plot(poly_svm, data = data, col = terrain.colors(10))
polypred<-predict(poly_svm, data)
table(predict = polypred, truth = y)
## truth
## predict 0 1
## 0 256 3
## 1 5 236
It looks like a non-linear kernal improves the models performance, specifically in the case of predicting 1, which was entirely missed by a linear kernel. The decision boundary
# Set seed for reproducibility
set.seed(1)
# Generate random data with 200 samples
x <- matrix(rnorm(200 * 2), ncol = 2)
# Create class labels
y <- c(rep(-1, 100), rep(1, 100)) # 100 samples for each class
# Shift samples of class 1 by 1.5 units along both dimensions
x[y == 1, ] <- x[y == 1, ] + 3.8
dat <- data.frame(x1 = x[, 1], x2 = x[, 2], y = factor(y))
# Plot the data
plot(x, col = ifelse(y == -1, 'blue', 'red'), pch = 19, xlab = 'Feature 1', ylab = 'Feature 2', main = 'Mock Data Class Distribution')
legend('topright', legend = c('Class -1', 'Class 1'), col = c('blue', 'red'), pch = 19)
grid()
set.seed (1)
tune.out <- tune(svm , y ~ ., data = dat , kernel = "linear",
ranges = list(cost = c(0.1, 1, 5, 10)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.000 0.00000000
## 2 1.0 0.000 0.00000000
## 3 5.0 0.015 0.02415229
## 4 10.0 0.015 0.02415229
bestmod <- tune.out$best.model
summary(bestmod)
##
## Call:
## best.tune(METHOD = svm, train.x = y ~ ., data = dat, ranges = list(cost = c(0.1,
## 1, 5, 10)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 25
##
## ( 13 12 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
set.seed(1)
# using the books approach
xtest <- matrix(rnorm(200 * 2), ncol = 2)
ytest <- sample(c(-1, 1), 200, replace = TRUE)
xtest[ytest == 1, ] <- xtest[ytest == 1, ] + 1
testdat<-data.frame(x1 = xtest[, 1], x2 = xtest[, 2], y = as.factor(ytest))
ypred <- predict(bestmod , testdat)
table(predict = ypred , truth = testdat$y)
## truth
## predict -1 1
## -1 95 93
## 1 0 12
svmfit1<- svm(y ~ ., data = dat , kernel = "linear", cost = 1, scale = FALSE)
ypred1<- predict(svmfit1, testdat)
table(predict = ypred1, truth = testdat$y)
## truth
## predict -1 1
## -1 95 97
## 1 0 8
svmfit5<- svm(y ~ ., data = dat , kernel = "linear", cost = 5, scale = FALSE)
ypred5<- predict(svmfit5, testdat)
table(predict = ypred5, truth = testdat$y)
## truth
## predict -1 1
## -1 95 93
## 1 0 12
svmfit10<- svm(y ~ ., data = dat , kernel = "linear", cost = 10, scale = FALSE)
ypred10<- predict(svmfit10, testdat)
table(predict = ypred10, truth = testdat$y)
## truth
## predict -1 1
## -1 95 93
## 1 0 12
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.3.1
median_mpg<-median(Auto$mpg)
Auto$high_mpg<-ifelse(Auto$mpg > median_mpg, 1, 0)
Auto$high_mpg<-as.factor(Auto$high_mpg)
auto<-Auto[, -c(1, 9)]
tune.auto<- tune(svm, high_mpg ~ ., data = auto, kernel = "linear",
ranges = list(cost = c(0.1, 1, 5, 10)))
summary(tune.auto)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.08916667
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.09198718 0.05031609
## 2 1.0 0.09429487 0.04949748
## 3 5.0 0.08916667 0.04173887
## 4 10.0 0.08916667 0.04173887
best.svm<-svm(high_mpg ~ ., data = auto, kernal = "linear", cost = 1)
plot(best.svm, auto, horsepower ~ weight)
plot(best.svm, auto, weight ~ displacement)
plot(best.svm, auto, year ~ cylinders)
plot(best.svm, auto, acceleration ~ horsepower)
plot(best.svm, auto, origin ~ cylinders)
tune.poly<- tune(svm, high_mpg ~ ., data = auto,
kernel = "polynomial",
ranges = list(
cost = c(0.1 , 1, 10, 100, 1000) ,
gamma = c(0.5, 1, 2, 3, 4)))
summary(tune.poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 0.1 1
##
## - best performance: 0.07666667
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-01 0.5 0.08185897 0.03993618
## 2 1e+00 0.5 0.08685897 0.04046001
## 3 1e+01 0.5 0.10230769 0.06620981
## 4 1e+02 0.5 0.12262821 0.04823199
## 5 1e+03 0.5 0.13282051 0.05128009
## 6 1e-01 1.0 0.07666667 0.04196333
## 7 1e+00 1.0 0.10230769 0.06509713
## 8 1e+01 1.0 0.11493590 0.04574052
## 9 1e+02 1.0 0.13282051 0.05921372
## 10 1e+03 1.0 0.13019231 0.05731473
## 11 1e-01 2.0 0.10230769 0.06509713
## 12 1e+00 2.0 0.12006410 0.05122152
## 13 1e+01 2.0 0.14564103 0.05840631
## 14 1e+02 2.0 0.13019231 0.05731473
## 15 1e+03 2.0 0.13019231 0.05731473
## 16 1e-01 3.0 0.10730769 0.05376741
## 17 1e+00 3.0 0.13025641 0.06004600
## 18 1e+01 3.0 0.13019231 0.05731473
## 19 1e+02 3.0 0.13019231 0.05731473
## 20 1e+03 3.0 0.13019231 0.05731473
## 21 1e-01 4.0 0.12012821 0.05284236
## 22 1e+00 4.0 0.13544872 0.06205715
## 23 1e+01 4.0 0.13019231 0.05731473
## 24 1e+02 4.0 0.13019231 0.05731473
## 25 1e+03 4.0 0.13019231 0.05731473
best_poly<-svm(high_mpg ~ ., data = auto, kernel = "polynomial", cost = 0.1, gamma = 1)
plot(best_poly, auto, horsepower ~ weight)
plot(best_poly, auto, weight ~ displacement)
plot(best_poly, auto, year ~ cylinders)
plot(best_poly, auto, acceleration ~ horsepower)
plot(best_poly, auto, origin ~ cylinders)
tune.rad<- tune(svm, high_mpg ~ ., data = auto,
kernel = "radial",
ranges = list(
cost = c(0.1 , 1, 10, 100, 1000) ,
gamma = c(0.5, 1, 2, 3, 4)))
summary(tune.rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 1
##
## - best performance: 0.05891026
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-01 0.5 0.08705128 0.06201207
## 2 1e+00 0.5 0.07423077 0.06109228
## 3 1e+01 0.5 0.07673077 0.06520564
## 4 1e+02 0.5 0.09198718 0.03691730
## 5 1e+03 0.5 0.10217949 0.02998648
## 6 1e-01 1.0 0.08705128 0.06082264
## 7 1e+00 1.0 0.05891026 0.05139984
## 8 1e+01 1.0 0.06903846 0.05002561
## 9 1e+02 1.0 0.08173077 0.04337686
## 10 1e+03 1.0 0.08173077 0.04337686
## 11 1e-01 2.0 0.10237179 0.06856414
## 12 1e+00 2.0 0.06403846 0.05833016
## 13 1e+01 2.0 0.08435897 0.03662670
## 14 1e+02 2.0 0.09198718 0.03884572
## 15 1e+03 2.0 0.09198718 0.03884572
## 16 1e-01 3.0 0.29916667 0.09413360
## 17 1e+00 3.0 0.08192308 0.06270325
## 18 1e+01 3.0 0.08698718 0.04244565
## 19 1e+02 3.0 0.09205128 0.04241353
## 20 1e+03 3.0 0.09205128 0.04241353
## 21 1e-01 4.0 0.53878205 0.06250520
## 22 1e+00 4.0 0.08192308 0.06032820
## 23 1e+01 4.0 0.08955128 0.04422824
## 24 1e+02 4.0 0.09205128 0.04241353
## 25 1e+03 4.0 0.09205128 0.04241353
best_rad<-svm(high_mpg ~ ., data = auto, kernel = "radial", cost = 1, gamma = 1)
plot(best_rad, auto, horsepower ~ weight)
plot(best_rad, auto, weight ~ displacement)
plot(best_rad, auto, year ~ cylinders)
plot(best_rad, auto, acceleration ~ horsepower)
plot(best_rad, auto, origin ~ cylinders)
The output suggests the radial model has the lowest error, with
polynomial close behind.
oj_data<-OJ
train<-sample(1070, 800)
test_oj<-oj_data[-train, ]
svm_oj<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "linear", cost = 0.1)
summary(svm_oj)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_data[train, ], kernal = "linear",
## cost = 0.1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.1
##
## Number of Support Vectors: 545
##
## ( 273 272 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The output indicates that there were a total of 546 support vectors in this model, using a cost value of 0.1, with 274 being within class CH and 272 within class MM.
train_pred <- predict(svm_oj, oj_data[train, ])
table(predict = train_pred, truth = oj_data[train, ]$Purchase)
## truth
## predict CH MM
## CH 428 76
## MM 59 237
(45+91)/(459 + 45 + 205 + 91)
## [1] 0.17
The training error is 0.17
test_pred<- predict(svm_oj, test_oj)
table(predict = test_pred, truth = test_oj$Purchase)
## truth
## predict CH MM
## CH 147 27
## MM 19 77
(14+39)/(14 + 39 + 153 + 64)
## [1] 0.1962963
Test error is 19%.
oj_tune<-tune(svm, Purchase ~ ., data = oj_data[train, ], kernal = "linear", ranges = list(
cost = c(0.1 , 1, 5, 10)))
summary(oj_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.18875 0.04803428
## 2 1.0 0.18125 0.04686342
## 3 5.0 0.18875 0.04185375
## 4 10.0 0.19250 0.03184162
oj_best<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "linear", cost = 0.1)
cost_pred <- predict(oj_best, oj_data[train, ])
table(predict = cost_pred, truth = oj_data[train, ]$Purchase)
## truth
## predict CH MM
## CH 428 76
## MM 59 237
(51+82)/(51 + 82 + 435 + 232)
## [1] 0.16625
The training error is .16%, which is slightly lower than the original training error
test_best<-predict(oj_best, test_oj)
table(predict = test_best, truth = test_oj$Purchase)
## truth
## predict CH MM
## CH 147 27
## MM 19 77
(15 + 38)/(15 + 152 + 38 + 65)
## [1] 0.1962963
The test error rate stays the same at 19%.
oj_rad<-tune(svm, Purchase ~ ., data = oj_data[train, ], kernal = "radial", ranges = list(
cost = c(0.1 , 1, 5, 10)))
summary(oj_rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.18625 0.02791978
## 2 1.0 0.17250 0.03322900
## 3 5.0 0.18125 0.03019037
## 4 10.0 0.18625 0.03087272
rad_best<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "radial", cost = 1)
rad_pred<- predict(rad_best, oj_data[train, ])
table(predict = rad_pred, truth = oj_data[train, ]$Purchase)
## truth
## predict CH MM
## CH 443 71
## MM 44 242
(44+81)/(442 + 44 + 81 + 233)
## [1] 0.15625
The training error under this model is 15%
rad_best<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "radial", cost = 1)
rad_test_pred<- predict(rad_best, test_oj)
table(predict = rad_test_pred, truth = test_oj$Purchase)
## truth
## predict CH MM
## CH 148 27
## MM 18 77
(14+32)/(14+32+153+71)
## [1] 0.1703704
The test error for this model is .17%
oj_poly<-tune(svm, Purchase ~ ., data = oj_data[train, ], kernal = "polynomial", degree = 2, ranges = list(
cost = c(0.1 , 1, 5, 10)))
summary(oj_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.165
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.18375 0.02503470
## 2 1.0 0.16500 0.03425801
## 3 5.0 0.18500 0.04116363
## 4 10.0 0.19000 0.04031129
poly_best<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "polynomial", cost = 0.1, degree = 2)
poly_pred<- predict(poly_best, oj_data[train, ])
table(predict = poly_pred, truth = oj_data[train, ]$Purchase)
## truth
## predict CH MM
## CH 428 76
## MM 59 237
(51+82)/(51+82+435+232)
## [1] 0.16625
The training error for this model is .16%
poly_best<-svm(Purchase ~ ., data = oj_data[train, ], kernal = "polynomial", cost = 0.1, degree = 2)
poly_test_pred<- predict(poly_best, test_oj)
table(predict = poly_test_pred, truth = test_oj$Purchase)
## truth
## predict CH MM
## CH 147 27
## MM 19 77
(15+38)/(15+65+38+152)
## [1] 0.1962963
The test error is .19% for this model. Considering them all, it appears a radial SVM with cost = 1 results in the lowest training/ test error.