1. In a past homework, you performed ridge regression on the wine quality data set. Now use a support vector machine to classify these data. 1a) First classify the data treating the last column as an ordered factor (the wine tasters score). Next treat the last column as a numeric. Which SVM implementation is better? Why do you think it is better? 1b) Using the best version choose two attributes and a slice through the data to plot. Choose a different set of attributes and another set of slices to plot. 1c) Compare and contrast the best version of the SVM with the ridge regression model
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(e1071)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")

winequality <- read.csv("winequality-red.csv", header = TRUE,sep = ";")
model1 <- svm(factor(quality, ordered = TRUE) ~ ., data = winequality, gamma = 1, cost = 4, cross = 10)
print(model1)
## 
## Call:
## svm(formula = factor(quality, ordered = TRUE) ~ ., data = winequality, 
##     gamma = 1, cost = 4, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  1 
## 
## Number of Support Vectors:  1365
summary(model1)
## 
## Call:
## svm(formula = factor(quality, ordered = TRUE) ~ ., data = winequality, 
##     gamma = 1, cost = 4, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  1 
## 
## Number of Support Vectors:  1365
## 
##  ( 570 543 172 53 17 10 )
## 
## 
## Number of Classes:  6 
## 
## Levels: 
##  3 4 5 6 7 8
## 
## 10-fold cross-validation on training data:
## 
## Total Accuracy: 66.79174 
## Single Accuracies:
##  62.89308 66.875 70 65.625 66.875 68.125 71.25 64.375 70 61.875
svm1Err = (100 - model1$tot.accuracy)/100
svm1Err
## [1] 0.3320826
plot(model1, winequality, fixed.acidity ~ alcohol)

plot(model1, winequality, fixed.acidity ~ volatile.acidity)

plot(model1, winequality, free.sulfur.dioxide ~ total.sulfur.dioxide)

x1 <- subset(winequality, select = -quality)
y1 <- winequality$quality

pred1 <- predict(model1, x1)
table(pred1, y1)
##      y1
## pred1   3   4   5   6   7   8
##     3  10   0   0   0   0   0
##     4   0  52   0   0   0   0
##     5   0   1 676   4   0   0
##     6   0   0   5 633   1   0
##     7   0   0   0   1 198   1
##     8   0   0   0   0   0  17
obj <- tune(svm, quality ~ ., data = winequality, ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "cross"))

summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625    4
## 
## - best performance: 0.3780183 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3780183 0.05638519
## 2  0.1250    4 0.3847228 0.06737206
## 3  0.2500    4 0.3957970 0.07105080
## 4  0.5000    4 0.3943695 0.08031540
## 5  1.0000    4 0.3945748 0.06662639
## 6  0.0625    8 0.3802658 0.06855827
## 7  0.1250    8 0.4005225 0.07415958
## 8  0.2500    8 0.4210195 0.08246570
## 9  0.5000    8 0.4073099 0.07762405
## 10 1.0000    8 0.3993351 0.06689450
## 11 0.0625   16 0.3934438 0.08258105
## 12 0.1250   16 0.4286154 0.08013054
## 13 0.2500   16 0.4554170 0.08695392
## 14 0.5000   16 0.4254551 0.07582938
## 15 1.0000   16 0.4002628 0.06667417
str(winequality)
## 'data.frame':    1599 obs. of  12 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...
names(winequality)
##  [1] "fixed.acidity"        "volatile.acidity"     "citric.acid"         
##  [4] "residual.sugar"       "chlorides"            "free.sulfur.dioxide" 
##  [7] "total.sulfur.dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"
model2 <- svm(quality ~ ., data = winequality, gamma = 0.0909, cost = 1, cross = 10)
print(model2)
## 
## Call:
## svm(formula = quality ~ ., data = winequality, gamma = 0.0909, 
##     cost = 1, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.0909 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  1328
summary(model2)
## 
## Call:
## svm(formula = quality ~ ., data = winequality, gamma = 0.0909, 
##     cost = 1, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.0909 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  1328
## 
## 
## 
## 10-fold cross-validation on training data:
## 
## Total Mean Squared Error: 0.3923175 
## Squared Correlation Coefficient: 0.4003188 
## Mean Squared Errors:
##  0.4934818 0.3799656 0.3101665 0.4353687 0.2939571 0.345708 0.3990553 0.3718168 0.4352961 0.4589919
svm2Err = (100 - model2$tot.accuracy)/100
svm2Err
## numeric(0)
plot(model2, winequality, density ~ alcohol)
plot(model2, winequality, residual.sugar ~ pH)

x2 <- subset(winequality, select = -quality)
y2 <- winequality$quality
# test with train data
pred2 <- round(as.numeric(predict(model2, x2)), 0)
# class(pred) Check accuracy:
table(pred2, y2)
##      y2
## pred2   3   4   5   6   7   8
##     4   1   0   0   0   0   0
##     5   9  44 550 165   6   0
##     6   0   9 129 453 109   8
##     7   0   0   2  20  84  10
s <- 1 + 9 + 44 + 9 + 129 + 2 + 165 + 20 + 6 + 109 + 8 + 10
s
## [1] 512
perSmallError2 <- round(s/nrow(winequality), 2)
perSmallError2 
## [1] 0.32
l <- 9 + 9 + 2 + 20 + 6 + 8
l
## [1] 54
table(pred1, y1)
##      y1
## pred1   3   4   5   6   7   8
##     3  10   0   0   0   0   0
##     4   0  52   0   0   0   0
##     5   0   1 676   4   0   0
##     6   0   0   5 633   1   0
##     7   0   0   0   1 198   1
##     8   0   0   0   0   0  17

Treating Quality as factor or numeric (rounded) had little effect on the classifier. Resulting in 32% and 33% error respectively. I used a second evaluation technique that accepped one-level difference as correct classification. Use this approach, the error for both categorical and numeric quality is about the same 3%. 2) Classify the sonar data set. 2a) Use a support vector machine to classify the sonar data set. First tune an SVM employing radial basis function (default). Next tune an SVM employing a linear kernel. Compare the results. 2b) In past homework, trees were used to classify the sonar data. Compare the best result using trees with the best result using SVM.

library(e1071)
library(rpart)
library(MASS)
setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
oldpar <- par(no.readonly = TRUE)  # record current setting
par(mar = rep(1, 4))
sonar <- read.csv("sonar_train.csv", header = FALSE)
sonar$V61 <- as.factor(sonar$V61)
model <- svm(V61 ~ ., data = sonar)
print(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  97
plot(model, sonar, V60 ~ V59)

obj <- tune(svm, V61 ~ ., data = sonar, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##    0.5    4
## 
## - best performance: 0.5615385 
## 
## - Detailed performance results:
##   gamma cost     error dispersion
## 1   0.5    4 0.5615385 0.13099528
## 2   1.0    4 0.6307692 0.07944581
## 3   2.0    4 0.6307692 0.07944581
## 4   0.5    8 0.5615385 0.13099528
## 5   1.0    8 0.6307692 0.07944581
## 6   2.0    8 0.6307692 0.07944581
## 7   0.5   16 0.5615385 0.13099528
## 8   1.0   16 0.6307692 0.07944581
## 9   2.0   16 0.6307692 0.07944581
plot(obj)

obj$best.parameters
##   gamma cost
## 1   0.5    4
model <- svm(V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
print(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  130
summary(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  130
## 
##  ( 66 64 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1
model <- svm(V61 ~ ., data = sonar, kernel = "linear")
print(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  47
summary(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  47
## 
##  ( 21 26 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1
plot(model, sonar, V60 ~ V59)

plot(model, sonar, V2 ~ V1)

x <- subset(sonar, select = -V61)
y <- sonar$V61
pred <- predict(model, x)
table(pred, y)
##     y
## pred -1  1
##   -1 66  2
##   1   0 62
C <- 0.65

model <- svm(V61 ~ ., data = sonar, kernel = "linear", cost = C)
print(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear", cost = C)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.65 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  51
summary(model)
## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear", cost = C)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.65 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  51
## 
##  ( 24 27 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1
plot(model, sonar, V60 ~ V59)

plot(model, sonar, V2 ~ V1)

17 Errors, the classification improved, but it still had errors compare of sonar with tree depth

library(rpart)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
train <- read.csv("sonar_train.csv", header = FALSE)
y <- as.factor(train[, 61])
x <- train[, 1:60]
test <- read.csv("sonar_test.csv", header = FALSE)
y_test <- as.factor(test[, 61])
x_test <- test[, 1:60]


train_error <- rep(0, 6)
test_error <- rep(0, 6)
for (dep in 1:6) 
  {
    fit <- rpart(y ~ ., x, control = rpart.control(minsplit = 0, minbucket = 0, cp = -1,maxcompete = 0, maxsurrogate = 0, usesurrogate = 0, xval = 0,maxdepth = dep))
    
   train_error[dep] <- 1 - sum(y == predict(fit, x, type = "class"))/length(y)   
   test_error[dep] <- 1 - sum(y_test == predict(fit, x_test, type = "class"))/length(y_test)
}
plot(seq(1, 6), test_error, type = "o", pch = 19, ylim = c(0, 0.5), ylab = "Error Rate",xlab = "Tree Depth", main = "Err Rate versus Tree Depth Plot")

points(train_error, type = "o", pch = 19, lwd = 4, col = "blue")


legend(4, 0.5, c("Test Error", "Training Error"), col = c("black", "blue"), pch = 19, lwd = c(1, 4))

train_error
## [1] 0.22307692 0.19230769 0.10769231 0.06153846 0.01538462 0.00000000
test_error
## [1] 0.2820513 0.2948718 0.3333333 0.2820513 0.2564103 0.2692308
min(train_error)
## [1] 0
min(test_error)
## [1] 0.2564103
  1. The in class example (svm1.r) used the glass data set. Use the Random Forest technique on the glass data. Compare the Random Forest results with the results obtained in class with SVM.
data(Glass, package = "mlbench")

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
str(Glass)
## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
Glass$Type
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [106] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [141] 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5
## [176] 5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [211] 7 7 7 7
## Levels: 1 2 3 5 6 7
index <- 1:nrow(Glass)
set.seed(pi)
testindex <- sample(index, trunc(length(index)/3))
testset <- Glass[testindex, ]
trainset <- Glass[-testindex, ]


svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 18  7  1  0  0  0
##    2  5 18  0  5  0  2
##    3  2  1  2  0  0  0
##    5  0  0  0  2  0  0
##    6  0  0  0  0  1  0
##    7  0  0  0  0  0  7
length(testset[, 10])
## [1] 71
svmErr <- 1 - sum(svm.pred == testset[, 10])/length(testset[, 10])
svmErr
## [1] 0.3239437
rpart.model <- rpart(Type ~ ., data = trainset)
rpart.pred <- predict(rpart.model, testset[, -10], type = "class")
table(pred = rpart.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 20  3  0  0  0  0
##    2  4 19  2  1  1  0
##    3  1  3  1  0  0  0
##    5  0  1  0  6  0  0
##    6  0  0  0  0  0  0
##    7  0  0  0  0  0  9
1 - sum(rpart.pred == testset[, 10])/length(testset[, 10])
## [1] 0.2253521
obj <- tune(svm, Type ~ ., data = trainset, ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625   16
## 
## - best performance: 0.3142857 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3333333 0.10830935
## 2  0.1250    4 0.3342857 0.07165045
## 3  0.2500    4 0.3490476 0.06044473
## 4  0.5000    4 0.3485714 0.10953071
## 5  1.0000    4 0.3557143 0.12433851
## 6  0.0625    8 0.3209524 0.07148851
## 7  0.1250    8 0.3633333 0.06834803
## 8  0.2500    8 0.3566667 0.09543498
## 9  0.5000    8 0.3628571 0.11715533
## 10 1.0000    8 0.3557143 0.12433851
## 11 0.0625   16 0.3142857 0.08571429
## 12 0.1250   16 0.3357143 0.09718253
## 13 0.2500   16 0.3566667 0.09091927
## 14 0.5000   16 0.3490476 0.11584148
## 15 1.0000   16 0.3633333 0.11097611
plot(obj)

obj$best.parameters
##     gamma cost
## 11 0.0625   16
glass.cf <- randomForest(Type ~ ., data = Glass, control = randomForest_unbiased(mtry = 2))
table(Glass$Type, glass.pred <- predict(randomForest(Type ~ ., data = Glass, control = randomForest_unbiased(ntree = 60)),OOB = TRUE))
##    
##      1  2  3  5  6  7
##   1 62  6  2  0  0  0
##   2 10 60  1  3  2  0
##   3  7  4  6  0  0  0
##   5  0  2  0 10  0  1
##   6  1  1  0  0  7  0
##   7  1  3  0  0  0 25
rfErr <- 1 - sum(Glass$Type == glass.pred)/length(glass.pred)
rfErr
## [1] 0.2056075
  1. Choose a new data set which we haven’t used in class yet (suggestion: choose one from http://archive.ics.uci.edu/ml/.) Use SVM to classify the data set. Try different kernels. Does changing the kernel make a difference? Which kernel resulted in the smallest error? Use another technique to classify the data set. Which resulted in the better model? (Make sure you describe the data set)
data(iris)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
obj <- tune(svm, Species~., data = iris,ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "fix"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: fixed training/validation set 
## 
## - best parameters:
##  gamma cost
##    0.5    4
## 
## - best performance: 0.02 
## 
## - Detailed performance results:
##   gamma cost error dispersion
## 1   0.5    4  0.02         NA
## 2   1.0    4  0.04         NA
## 3   2.0    4  0.04         NA
## 4   0.5    8  0.04         NA
## 5   1.0    8  0.04         NA
## 6   2.0    8  0.04         NA
## 7   0.5   16  0.04         NA
## 8   1.0   16  0.04         NA
## 9   2.0   16  0.04         NA
plot(obj)

x <- iris[,-5]
y <- iris[,5]
obj2 <- tune.knn(x, y, k = 1:5, tunecontrol = tune.control(sampling = "boot"))
summary(obj2)
## 
## Parameter tuning of 'knn.wrapper':
## 
## - sampling method: bootstrapping 
## 
## - best parameters:
##  k
##  4
## 
## - best performance: 0.04726544 
## 
## - Detailed performance results:
##   k      error dispersion
## 1 1 0.04732794 0.02165832
## 2 2 0.05213906 0.02490950
## 3 3 0.04729779 0.02432917
## 4 4 0.04726544 0.03014710
## 5 5 0.04898860 0.02657732
data(mtcars)
obj3 <- tune.rpart(mpg~., data = mtcars, minsplit = c(5,10,15))
summary(obj3)
## 
## Parameter tuning of 'rpart.wrapper':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  minsplit
##         5
## 
## - best performance: 9.088153 
## 
## - Detailed performance results:
##   minsplit     error dispersion
## 1        5  9.088153   7.463697
## 2       10 11.649977  10.104875
## 3       15 17.655931  10.144265
plot(obj3)

tune(lm, mpg~., data = mtcars)
## 
## Error estimation of 'lm' using 10-fold cross validation: 13.11232
tune.control(random = FALSE, nrepeat = 1, repeat.aggregate = min,
sampling = c("cross", "fix", "bootstrap"), sampling.aggregate = mean,
sampling.dispersion = sd,cross = 10, fix = 2/3, nboot = 10, boot.size = 9/10, best.model = TRUE,performances = TRUE, error.fun = NULL)
## $random
## [1] FALSE
## 
## $nrepeat
## [1] 1
## 
## $repeat.aggregate
## function (..., na.rm = FALSE)  .Primitive("min")
## 
## $sampling
## [1] "cross"
## 
## $sampling.aggregate
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x000000000769da20>
## <environment: namespace:base>
## 
## $sampling.dispersion
## function (x, na.rm = FALSE) 
## sqrt(var(if (is.vector(x)) x else as.double(x), na.rm = na.rm))
## <bytecode: 0x000000000769d188>
## <environment: namespace:stats>
## 
## $cross
## [1] 10
## 
## $fix
## [1] 0.6666667
## 
## $nboot
## [1] 10
## 
## $boot.size
## [1] 0.9
## 
## $best.model
## [1] TRUE
## 
## $performances
## [1] TRUE
## 
## $error.fun
## NULL
## 
## attr(,"class")
## [1] "tune.control"
  1. Use SVM with kernel = “linear” to create regression predictions on the data set created using these lines of code: x <- seq(0.1, 5, by = 0.05) # the observed feature y <- log(x) + rnorm(x, sd = 0.2) # the target for the observed feature Next try various kernels and added features with SVM. Can you improve the model by adding an extra feature which might be a function of the first feature? Compare both lm.ridge and svm. Which method produced a better model? (don’t forget to tune your models)
data(iris)
attach(iris)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
model <- svm(Species ~ ., data = iris)
x <- subset(iris, select = -Species)
y <- Species
model <- svm(x, y)
print(model)
## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51
summary(model)
## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51
## 
##  ( 8 22 21 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
pred <- predict(model, x)
pred <- fitted(model)
table(pred, y)
##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         2
##   virginica       0          2        48
pred <- predict(model, x, decision.values = TRUE)
attr(pred, "decision.values")[1:4,]
##   setosa/versicolor setosa/virginica versicolor/virginica
## 1          1.196152         1.091757            0.6708810
## 2          1.064621         1.056185            0.8483518
## 3          1.180842         1.074542            0.6439798
## 4          1.110699         1.053012            0.6782041
plot(cmdscale(dist(iris[,-5])),col = as.integer(iris[,5]),pch = c("o","+")[1:150 %in% model$index + 1])

x <- seq(0.1, 5, by = 0.05)
y <- log(x) + rnorm(x, sd = 0.2)
m <- svm(x, y)
new <- predict(m, x)
plot(x, y)
points(x, log(x), col = 2)
points(x, new, col = 4)

X <- data.frame(a = rnorm(1000), b = rnorm(1000))
attach(X)
m <- svm(X, gamma = 0.1)
m <- svm(~ a + b, gamma = 0.1)
newdata <- data.frame(a = c(0, 4), b = c(0, 4))
predict (m, newdata)
##     1     2 
##  TRUE FALSE
plot(X, col = 1:1000 %in% m$index + 1, xlim = c(-5,5), ylim=c(-5,5))
points(newdata, pch = "+", col = 2, cex = 5)

i2 <- iris
levels(i2$Species)[3] <- "versicolor"
summary(i2$Species)
##     setosa versicolor 
##         50        100
wts <- 100 / table(i2$Species)
wts
## 
##     setosa versicolor 
##          2          1
m <- svm(Species ~ ., data = i2, class.weights = wts)
model <- svm(Species ~ ., data = iris, gamma = 0.5, cost = 4)
print(model)
## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  49
summary(model)
## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  49
## 
##  ( 11 18 20 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
x <- subset(iris, select = -Species)
y <- Species
pred <- predict(model, x)
table(pred, y)
##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         1
##   virginica       0          2        49
obj <- tune(svm, Species~., data = iris,ranges = list(gamma = seq(.5,1.5,0.1), cost = seq(7,9,0.5)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##    0.5    7
## 
## - best performance: 0.06 
## 
## - Detailed performance results:
##    gamma cost      error dispersion
## 1    0.5  7.0 0.06000000 0.07981460
## 2    0.6  7.0 0.06000000 0.07981460
## 3    0.7  7.0 0.06000000 0.07981460
## 4    0.8  7.0 0.06000000 0.07981460
## 5    0.9  7.0 0.06000000 0.07981460
## 6    1.0  7.0 0.06000000 0.07981460
## 7    1.1  7.0 0.06000000 0.07981460
## 8    1.2  7.0 0.06000000 0.07981460
## 9    1.3  7.0 0.06666667 0.07698004
## 10   1.4  7.0 0.06000000 0.07981460
## 11   1.5  7.0 0.06000000 0.07981460
## 12   0.5  7.5 0.06000000 0.07981460
## 13   0.6  7.5 0.06000000 0.07981460
## 14   0.7  7.5 0.06000000 0.07981460
## 15   0.8  7.5 0.06000000 0.07981460
## 16   0.9  7.5 0.06000000 0.07981460
## 17   1.0  7.5 0.06000000 0.07981460
## 18   1.1  7.5 0.06666667 0.07698004
## 19   1.2  7.5 0.06666667 0.07698004
## 20   1.3  7.5 0.06666667 0.07698004
## 21   1.4  7.5 0.06000000 0.07981460
## 22   1.5  7.5 0.06000000 0.07981460
## 23   0.5  8.0 0.06000000 0.07981460
## 24   0.6  8.0 0.06000000 0.07981460
## 25   0.7  8.0 0.06000000 0.07981460
## 26   0.8  8.0 0.06000000 0.07981460
## 27   0.9  8.0 0.06000000 0.07981460
## 28   1.0  8.0 0.06000000 0.07981460
## 29   1.1  8.0 0.06666667 0.07698004
## 30   1.2  8.0 0.06666667 0.07698004
## 31   1.3  8.0 0.06666667 0.07698004
## 32   1.4  8.0 0.06000000 0.07981460
## 33   1.5  8.0 0.06000000 0.07981460
## 34   0.5  8.5 0.06000000 0.07981460
## 35   0.6  8.5 0.06000000 0.07981460
## 36   0.7  8.5 0.06000000 0.07981460
## 37   0.8  8.5 0.06000000 0.07981460
## 38   0.9  8.5 0.06000000 0.07981460
## 39   1.0  8.5 0.06666667 0.07698004
## 40   1.1  8.5 0.06666667 0.07698004
## 41   1.2  8.5 0.06666667 0.07698004
## 42   1.3  8.5 0.06666667 0.07698004
## 43   1.4  8.5 0.06000000 0.07981460
## 44   1.5  8.5 0.06000000 0.07981460
## 45   0.5  9.0 0.06000000 0.07981460
## 46   0.6  9.0 0.06000000 0.07981460
## 47   0.7  9.0 0.06000000 0.07981460
## 48   0.8  9.0 0.06000000 0.07981460
## 49   0.9  9.0 0.06000000 0.07981460
## 50   1.0  9.0 0.06000000 0.07981460
## 51   1.1  9.0 0.06666667 0.07698004
## 52   1.2  9.0 0.06666667 0.07698004
## 53   1.3  9.0 0.06666667 0.07698004
## 54   1.4  9.0 0.06000000 0.07981460
## 55   1.5  9.0 0.06000000 0.07981460
plot(obj)

obj$best.parameters
##   gamma cost
## 1   0.5    7
model <- svm(Species ~ ., data = iris, gamma = 1.0, cost = 8)
print(model)
## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 1, cost = 8)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  8 
##       gamma:  1 
## 
## Number of Support Vectors:  55
summary(model)
## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 1, cost = 8)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  8 
##       gamma:  1 
## 
## Number of Support Vectors:  55
## 
##  ( 13 20 22 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
obj$best.parameters
##   gamma cost
## 1   0.5    7
model$index
##  [1]  14  15  16  19  21  23  26  32  33  37  42  44  45  51  54  58  60
## [18]  61  63  64  65  68  69  71  73  77  78  79  80  84  85  86  99 101
## [35] 107 109 110 111 115 118 119 120 124 126 128 130 132 134 135 136 139
## [52] 142 147 149 150
pred <- predict(model, x)
table(pred, y)
##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         49         0
##   virginica       0          1        50
149/150 # SVM correctly classifies 99.3% of Iris Data
## [1] 0.9933333
which((pred == iris[,5]) == F) # 84
## [1] 84
model <- svm(Species ~ ., data = iris, gamma = 2.0, cost = 16)
pred <- predict(model, x)
which((pred == iris[,5]) == F)
## integer(0)
table(pred, y)
##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         50         0
##   virginica       0          0        50
model <- svm(Species ~ ., data = iris, gamma = 3.0, cost = 4)
pred <- predict(model, x)
which((pred == iris[,5]) == F)  # this also gave NO errors
## integer(0)
table(pred, y)
##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         50         0
##   virginica       0          0        50
data(Glass, package = "mlbench")
str(Glass)
## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
Glass$Type
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [106] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [141] 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5
## [176] 5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [211] 7 7 7 7
## Levels: 1 2 3 5 6 7
index <- 1:nrow(Glass)
set.seed(pi)
testindex <- sample(index, trunc(length(index)/3))


testset <- Glass[testindex, ]
trainset <- Glass[-testindex, ]
svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 18  7  1  0  0  0
##    2  5 18  0  5  0  2
##    3  2  1  2  0  0  0
##    5  0  0  0  2  0  0
##    6  0  0  0  0  1  0
##    7  0  0  0  0  0  7
length(testset[,10])
## [1] 71
1-sum(svm.pred == testset[,10])/length(testset[,10])
## [1] 0.3239437
rpart.model <- rpart(Type ~ ., data = trainset)
rpart.pred <- predict(rpart.model, testset[, -10], type = "class")
table(pred = rpart.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 20  3  0  0  0  0
##    2  4 19  2  1  1  0
##    3  1  3  1  0  0  0
##    5  0  1  0  6  0  0
##    6  0  0  0  0  0  0
##    7  0  0  0  0  0  9
1-sum(rpart.pred == testset[,10])/length(testset[,10])
## [1] 0.2253521
obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625   16
## 
## - best performance: 0.3142857 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3333333 0.10830935
## 2  0.1250    4 0.3342857 0.07165045
## 3  0.2500    4 0.3490476 0.06044473
## 4  0.5000    4 0.3485714 0.10953071
## 5  1.0000    4 0.3557143 0.12433851
## 6  0.0625    8 0.3209524 0.07148851
## 7  0.1250    8 0.3633333 0.06834803
## 8  0.2500    8 0.3566667 0.09543498
## 9  0.5000    8 0.3628571 0.11715533
## 10 1.0000    8 0.3557143 0.12433851
## 11 0.0625   16 0.3142857 0.08571429
## 12 0.1250   16 0.3357143 0.09718253
## 13 0.2500   16 0.3566667 0.09091927
## 14 0.5000   16 0.3490476 0.11584148
## 15 1.0000   16 0.3633333 0.11097611
plot(obj)

obj$best.parameters
##     gamma cost
## 11 0.0625   16
svm.model <- svm(Type ~ ., data = trainset, cost = 8, gamma = 0.25)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  2  3  0  1
##    3  1  1  0  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  0
##    7  0  0  0  0  0  8
length(testset[,10])
## [1] 71
1-sum(svm.pred == testset[,10])/length(testset[,10])
## [1] 0.2957746
(6+3+1+2+4+4+1)/71 # = 0.2957746
## [1] 0.2957746
obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = seq(0.1,0.15,0.01), cost = seq(15,17,1)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##   0.13   16
## 
## - best performance: 0.2728571 
## 
## - Detailed performance results:
##    gamma cost     error dispersion
## 1   0.10   15 0.2800000  0.1174775
## 2   0.11   15 0.2938095  0.1240342
## 3   0.12   15 0.2866667  0.1265827
## 4   0.13   15 0.2733333  0.1189566
## 5   0.14   15 0.2800000  0.1125485
## 6   0.15   15 0.3014286  0.1280521
## 7   0.10   16 0.2800000  0.1174775
## 8   0.11   16 0.2938095  0.1240342
## 9   0.12   16 0.2866667  0.1265827
## 10  0.13   16 0.2728571  0.1143970
## 11  0.14   16 0.2800000  0.1125485
## 12  0.15   16 0.3014286  0.1280521
## 13  0.10   17 0.2938095  0.1240342
## 14  0.11   17 0.2938095  0.1240342
## 15  0.12   17 0.2800000  0.1256036
## 16  0.13   17 0.2728571  0.1143970
## 17  0.14   17 0.2800000  0.1125485
## 18  0.15   17 0.3014286  0.1280521
plot(obj)

obj$best.parameters
##    gamma cost
## 10  0.13   16
svm.model <- svm(Type ~ ., data = trainset, cost = 16, gamma = 0.13)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  1  3  0  0
##    3  1  1  1  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  1
##    7  0  0  0  0  0  8
length(testset[,10])
## [1] 71
1-sum(svm.pred == testset[,10])/length(testset[,10])
## [1] 0.2816901
(9+1+2+1+3+1+1+1+1)/length(testset[,10]) #[1] 0.2816901
## [1] 0.2816901
obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = seq(0.1,0.15,0.01), cost = seq(15,17,1)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##   0.13   17
## 
## - best performance: 0.2657143 
## 
## - Detailed performance results:
##    gamma cost     error dispersion
## 1   0.10   15 0.2938095  0.1466009
## 2   0.11   15 0.3076190  0.1325411
## 3   0.12   15 0.3076190  0.1325411
## 4   0.13   15 0.2866667  0.1331972
## 5   0.14   15 0.2800000  0.1435398
## 6   0.15   15 0.2942857  0.1265827
## 7   0.10   16 0.2866667  0.1409359
## 8   0.11   16 0.3076190  0.1325411
## 9   0.12   16 0.3076190  0.1325411
## 10  0.13   16 0.2800000  0.1395346
## 11  0.14   16 0.2800000  0.1435398
## 12  0.15   16 0.2942857  0.1265827
## 13  0.10   17 0.2938095  0.1302178
## 14  0.11   17 0.3076190  0.1325411
## 15  0.12   17 0.3076190  0.1325411
## 16  0.13   17 0.2657143  0.1608243
## 17  0.14   17 0.2800000  0.1435398
## 18  0.15   17 0.3009524  0.1186724
plot(obj)

obj$best.parameters
##    gamma cost
## 16  0.13   17
svm.model <- svm(Type ~ ., data = trainset, cost = 16, gamma = 0.13)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])
##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  1  3  0  0
##    3  1  1  1  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  1
##    7  0  0  0  0  0  8
length(testset[,10])
## [1] 71
1-sum(svm.pred == testset[,10])/length(testset[,10])
## [1] 0.2816901
(9+1+2+1+3+1+1+1+1)/length(testset[,10]) #[1] 0.2816901
## [1] 0.2816901
x <- seq(0.1, 5, by = 0.05)
y <- log(x) + rnorm(x, sd = 0.2)


m <- svm(x, y)
new <- predict(m, x)
plot(x, y, col = 1)
points(x, log(x), col = 2)
points(x, new, col = 4)
legend(3, -1, c("actual y", "log(x)", "predicted"), col = c(1,2,4), pch=1)