Homework#6

In a past homework, you performed ridge regression on the wine quality data set. Now use a support vector machine to classify these data. 1a) First classify the data treating the last column as an ordered factor (the wine tasters score). Next treat the last column as a numeric. Which SVM implementation is better? Why do you think it is better? 1b) Using the best version choose two attributes and a slice through the data to plot. Choose a different set of attributes and another set of slices to plot. 1c) Compare and contrast the best version of the SVM with the ridge regression model

library(randomForest)

## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.

library(e1071)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")

winequality <- read.csv("winequality-red.csv", header = TRUE,sep = ";")
model1 <- svm(factor(quality, ordered = TRUE) ~ ., data = winequality, gamma = 1, cost = 4, cross = 10)
print(model1)

## 
## Call:
## svm(formula = factor(quality, ordered = TRUE) ~ ., data = winequality, 
##     gamma = 1, cost = 4, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  1 
## 
## Number of Support Vectors:  1365

summary(model1)

## 
## Call:
## svm(formula = factor(quality, ordered = TRUE) ~ ., data = winequality, 
##     gamma = 1, cost = 4, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  1 
## 
## Number of Support Vectors:  1365
## 
##  ( 570 543 172 53 17 10 )
## 
## 
## Number of Classes:  6 
## 
## Levels: 
##  3 4 5 6 7 8
## 
## 10-fold cross-validation on training data:
## 
## Total Accuracy: 66.79174 
## Single Accuracies:
##  62.89308 66.875 70 65.625 66.875 68.125 71.25 64.375 70 61.875

svm1Err = (100 - model1$tot.accuracy)/100
svm1Err

## [1] 0.3320826

plot(model1, winequality, fixed.acidity ~ alcohol)

plot(model1, winequality, fixed.acidity ~ volatile.acidity)

plot(model1, winequality, free.sulfur.dioxide ~ total.sulfur.dioxide)

x1 <- subset(winequality, select = -quality)
y1 <- winequality$quality

pred1 <- predict(model1, x1)
table(pred1, y1)

##      y1
## pred1   3   4   5   6   7   8
##     3  10   0   0   0   0   0
##     4   0  52   0   0   0   0
##     5   0   1 676   4   0   0
##     6   0   0   5 633   1   0
##     7   0   0   0   1 198   1
##     8   0   0   0   0   0  17

obj <- tune(svm, quality ~ ., data = winequality, ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "cross"))

summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625    4
## 
## - best performance: 0.3780183 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3780183 0.05638519
## 2  0.1250    4 0.3847228 0.06737206
## 3  0.2500    4 0.3957970 0.07105080
## 4  0.5000    4 0.3943695 0.08031540
## 5  1.0000    4 0.3945748 0.06662639
## 6  0.0625    8 0.3802658 0.06855827
## 7  0.1250    8 0.4005225 0.07415958
## 8  0.2500    8 0.4210195 0.08246570
## 9  0.5000    8 0.4073099 0.07762405
## 10 1.0000    8 0.3993351 0.06689450
## 11 0.0625   16 0.3934438 0.08258105
## 12 0.1250   16 0.4286154 0.08013054
## 13 0.2500   16 0.4554170 0.08695392
## 14 0.5000   16 0.4254551 0.07582938
## 15 1.0000   16 0.4002628 0.06667417

str(winequality)

## 'data.frame':    1599 obs. of  12 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...

names(winequality)

##  [1] "fixed.acidity"        "volatile.acidity"     "citric.acid"         
##  [4] "residual.sugar"       "chlorides"            "free.sulfur.dioxide" 
##  [7] "total.sulfur.dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"

model2 <- svm(quality ~ ., data = winequality, gamma = 0.0909, cost = 1, cross = 10)
print(model2)

## 
## Call:
## svm(formula = quality ~ ., data = winequality, gamma = 0.0909, 
##     cost = 1, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.0909 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  1328

summary(model2)

## 
## Call:
## svm(formula = quality ~ ., data = winequality, gamma = 0.0909, 
##     cost = 1, cross = 10)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.0909 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  1328
## 
## 
## 
## 10-fold cross-validation on training data:
## 
## Total Mean Squared Error: 0.3923175 
## Squared Correlation Coefficient: 0.4003188 
## Mean Squared Errors:
##  0.4934818 0.3799656 0.3101665 0.4353687 0.2939571 0.345708 0.3990553 0.3718168 0.4352961 0.4589919

svm2Err = (100 - model2$tot.accuracy)/100
svm2Err

## numeric(0)

plot(model2, winequality, density ~ alcohol)
plot(model2, winequality, residual.sugar ~ pH)

x2 <- subset(winequality, select = -quality)
y2 <- winequality$quality
# test with train data
pred2 <- round(as.numeric(predict(model2, x2)), 0)
# class(pred) Check accuracy:
table(pred2, y2)

##      y2
## pred2   3   4   5   6   7   8
##     4   1   0   0   0   0   0
##     5   9  44 550 165   6   0
##     6   0   9 129 453 109   8
##     7   0   0   2  20  84  10

s <- 1 + 9 + 44 + 9 + 129 + 2 + 165 + 20 + 6 + 109 + 8 + 10
s

## [1] 512

perSmallError2 <- round(s/nrow(winequality), 2)
perSmallError2

## [1] 0.32

l <- 9 + 9 + 2 + 20 + 6 + 8
l

## [1] 54

table(pred1, y1)

##      y1
## pred1   3   4   5   6   7   8
##     3  10   0   0   0   0   0
##     4   0  52   0   0   0   0
##     5   0   1 676   4   0   0
##     6   0   0   5 633   1   0
##     7   0   0   0   1 198   1
##     8   0   0   0   0   0  17

Treating Quality as factor or numeric (rounded) had little effect on the classifier. Resulting in 32% and 33% error respectively. I used a second evaluation technique that accepped one-level difference as correct classification. Use this approach, the error for both categorical and numeric quality is about the same 3%. 2) Classify the sonar data set. 2a) Use a support vector machine to classify the sonar data set. First tune an SVM employing radial basis function (default). Next tune an SVM employing a linear kernel. Compare the results. 2b) In past homework, trees were used to classify the sonar data. Compare the best result using trees with the best result using SVM.

library(e1071)
library(rpart)
library(MASS)
setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
oldpar <- par(no.readonly = TRUE)  # record current setting
par(mar = rep(1, 4))
sonar <- read.csv("sonar_train.csv", header = FALSE)
sonar$V61 <- as.factor(sonar$V61)
model <- svm(V61 ~ ., data = sonar)
print(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  97

plot(model, sonar, V60 ~ V59)

obj <- tune(svm, V61 ~ ., data = sonar, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##    0.5    4
## 
## - best performance: 0.5615385 
## 
## - Detailed performance results:
##   gamma cost     error dispersion
## 1   0.5    4 0.5615385 0.13099528
## 2   1.0    4 0.6307692 0.07944581
## 3   2.0    4 0.6307692 0.07944581
## 4   0.5    8 0.5615385 0.13099528
## 5   1.0    8 0.6307692 0.07944581
## 6   2.0    8 0.6307692 0.07944581
## 7   0.5   16 0.5615385 0.13099528
## 8   1.0   16 0.6307692 0.07944581
## 9   2.0   16 0.6307692 0.07944581

plot(obj)

obj$best.parameters

##   gamma cost
## 1   0.5    4

model <- svm(V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
print(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  130

summary(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  130
## 
##  ( 66 64 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1

model <- svm(V61 ~ ., data = sonar, kernel = "linear")
print(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  47

summary(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  47
## 
##  ( 21 26 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1

plot(model, sonar, V60 ~ V59)

plot(model, sonar, V2 ~ V1)

x <- subset(sonar, select = -V61)
y <- sonar$V61
pred <- predict(model, x)
table(pred, y)

##     y
## pred -1  1
##   -1 66  2
##   1   0 62

C <- 0.65

model <- svm(V61 ~ ., data = sonar, kernel = "linear", cost = C)
print(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear", cost = C)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.65 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  51

summary(model)

## 
## Call:
## svm(formula = V61 ~ ., data = sonar, kernel = "linear", cost = C)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.65 
##       gamma:  0.01666667 
## 
## Number of Support Vectors:  51
## 
##  ( 24 27 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1

plot(model, sonar, V60 ~ V59)

plot(model, sonar, V2 ~ V1)

17 Errors, the classification improved, but it still had errors compare of sonar with tree depth

library(rpart)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
train <- read.csv("sonar_train.csv", header = FALSE)
y <- as.factor(train[, 61])
x <- train[, 1:60]
test <- read.csv("sonar_test.csv", header = FALSE)
y_test <- as.factor(test[, 61])
x_test <- test[, 1:60]


train_error <- rep(0, 6)
test_error <- rep(0, 6)
for (dep in 1:6) 
  {
    fit <- rpart(y ~ ., x, control = rpart.control(minsplit = 0, minbucket = 0, cp = -1,maxcompete = 0, maxsurrogate = 0, usesurrogate = 0, xval = 0,maxdepth = dep))
    
   train_error[dep] <- 1 - sum(y == predict(fit, x, type = "class"))/length(y)   
   test_error[dep] <- 1 - sum(y_test == predict(fit, x_test, type = "class"))/length(y_test)
}
plot(seq(1, 6), test_error, type = "o", pch = 19, ylim = c(0, 0.5), ylab = "Error Rate",xlab = "Tree Depth", main = "Err Rate versus Tree Depth Plot")

points(train_error, type = "o", pch = 19, lwd = 4, col = "blue")


legend(4, 0.5, c("Test Error", "Training Error"), col = c("black", "blue"), pch = 19, lwd = c(1, 4))

train_error

## [1] 0.22307692 0.19230769 0.10769231 0.06153846 0.01538462 0.00000000

test_error

## [1] 0.2820513 0.2948718 0.3333333 0.2820513 0.2564103 0.2692308

min(train_error)

## [1] 0

min(test_error)

## [1] 0.2564103

The in class example (svm1.r) used the glass data set. Use the Random Forest technique on the glass data. Compare the Random Forest results with the results obtained in class with SVM.

data(Glass, package = "mlbench")

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
str(Glass)

## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...

Glass$Type

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [106] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [141] 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5
## [176] 5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [211] 7 7 7 7
## Levels: 1 2 3 5 6 7

index <- 1:nrow(Glass)
set.seed(pi)
testindex <- sample(index, trunc(length(index)/3))
testset <- Glass[testindex, ]
trainset <- Glass[-testindex, ]


svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 18  7  1  0  0  0
##    2  5 18  0  5  0  2
##    3  2  1  2  0  0  0
##    5  0  0  0  2  0  0
##    6  0  0  0  0  1  0
##    7  0  0  0  0  0  7

length(testset[, 10])

## [1] 71

svmErr <- 1 - sum(svm.pred == testset[, 10])/length(testset[, 10])
svmErr

## [1] 0.3239437

rpart.model <- rpart(Type ~ ., data = trainset)
rpart.pred <- predict(rpart.model, testset[, -10], type = "class")
table(pred = rpart.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 20  3  0  0  0  0
##    2  4 19  2  1  1  0
##    3  1  3  1  0  0  0
##    5  0  1  0  6  0  0
##    6  0  0  0  0  0  0
##    7  0  0  0  0  0  9

1 - sum(rpart.pred == testset[, 10])/length(testset[, 10])

## [1] 0.2253521

obj <- tune(svm, Type ~ ., data = trainset, ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625   16
## 
## - best performance: 0.3142857 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3333333 0.10830935
## 2  0.1250    4 0.3342857 0.07165045
## 3  0.2500    4 0.3490476 0.06044473
## 4  0.5000    4 0.3485714 0.10953071
## 5  1.0000    4 0.3557143 0.12433851
## 6  0.0625    8 0.3209524 0.07148851
## 7  0.1250    8 0.3633333 0.06834803
## 8  0.2500    8 0.3566667 0.09543498
## 9  0.5000    8 0.3628571 0.11715533
## 10 1.0000    8 0.3557143 0.12433851
## 11 0.0625   16 0.3142857 0.08571429
## 12 0.1250   16 0.3357143 0.09718253
## 13 0.2500   16 0.3566667 0.09091927
## 14 0.5000   16 0.3490476 0.11584148
## 15 1.0000   16 0.3633333 0.11097611

plot(obj)

obj$best.parameters

##     gamma cost
## 11 0.0625   16

glass.cf <- randomForest(Type ~ ., data = Glass, control = randomForest_unbiased(mtry = 2))
table(Glass$Type, glass.pred <- predict(randomForest(Type ~ ., data = Glass, control = randomForest_unbiased(ntree = 60)),OOB = TRUE))

##    
##      1  2  3  5  6  7
##   1 62  6  2  0  0  0
##   2 10 60  1  3  2  0
##   3  7  4  6  0  0  0
##   5  0  2  0 10  0  1
##   6  1  1  0  0  7  0
##   7  1  3  0  0  0 25

rfErr <- 1 - sum(Glass$Type == glass.pred)/length(glass.pred)
rfErr

## [1] 0.2056075

Choose a new data set which we haven’t used in class yet (suggestion: choose one from http://archive.ics.uci.edu/ml/.) Use SVM to classify the data set. Try different kernels. Does changing the kernel make a difference? Which kernel resulted in the smallest error? Use another technique to classify the data set. Which resulted in the better model? (Make sure you describe the data set)

data(iris)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
obj <- tune(svm, Species~., data = iris,ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "fix"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: fixed training/validation set 
## 
## - best parameters:
##  gamma cost
##    0.5    4
## 
## - best performance: 0.02 
## 
## - Detailed performance results:
##   gamma cost error dispersion
## 1   0.5    4  0.02         NA
## 2   1.0    4  0.04         NA
## 3   2.0    4  0.04         NA
## 4   0.5    8  0.04         NA
## 5   1.0    8  0.04         NA
## 6   2.0    8  0.04         NA
## 7   0.5   16  0.04         NA
## 8   1.0   16  0.04         NA
## 9   2.0   16  0.04         NA

plot(obj)

x <- iris[,-5]
y <- iris[,5]
obj2 <- tune.knn(x, y, k = 1:5, tunecontrol = tune.control(sampling = "boot"))
summary(obj2)

## 
## Parameter tuning of 'knn.wrapper':
## 
## - sampling method: bootstrapping 
## 
## - best parameters:
##  k
##  4
## 
## - best performance: 0.04726544 
## 
## - Detailed performance results:
##   k      error dispersion
## 1 1 0.04732794 0.02165832
## 2 2 0.05213906 0.02490950
## 3 3 0.04729779 0.02432917
## 4 4 0.04726544 0.03014710
## 5 5 0.04898860 0.02657732

data(mtcars)
obj3 <- tune.rpart(mpg~., data = mtcars, minsplit = c(5,10,15))
summary(obj3)

## 
## Parameter tuning of 'rpart.wrapper':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  minsplit
##         5
## 
## - best performance: 9.088153 
## 
## - Detailed performance results:
##   minsplit     error dispersion
## 1        5  9.088153   7.463697
## 2       10 11.649977  10.104875
## 3       15 17.655931  10.144265

plot(obj3)

tune(lm, mpg~., data = mtcars)

## 
## Error estimation of 'lm' using 10-fold cross validation: 13.11232

tune.control(random = FALSE, nrepeat = 1, repeat.aggregate = min,
sampling = c("cross", "fix", "bootstrap"), sampling.aggregate = mean,
sampling.dispersion = sd,cross = 10, fix = 2/3, nboot = 10, boot.size = 9/10, best.model = TRUE,performances = TRUE, error.fun = NULL)

## $random
## [1] FALSE
## 
## $nrepeat
## [1] 1
## 
## $repeat.aggregate
## function (..., na.rm = FALSE)  .Primitive("min")
## 
## $sampling
## [1] "cross"
## 
## $sampling.aggregate
## function (x, ...) 
## UseMethod("mean")
## <bytecode: 0x000000000769da20>
## <environment: namespace:base>
## 
## $sampling.dispersion
## function (x, na.rm = FALSE) 
## sqrt(var(if (is.vector(x)) x else as.double(x), na.rm = na.rm))
## <bytecode: 0x000000000769d188>
## <environment: namespace:stats>
## 
## $cross
## [1] 10
## 
## $fix
## [1] 0.6666667
## 
## $nboot
## [1] 10
## 
## $boot.size
## [1] 0.9
## 
## $best.model
## [1] TRUE
## 
## $performances
## [1] TRUE
## 
## $error.fun
## NULL
## 
## attr(,"class")
## [1] "tune.control"

Use SVM with kernel = “linear” to create regression predictions on the data set created using these lines of code: x <- seq(0.1, 5, by = 0.05) # the observed feature y <- log(x) + rnorm(x, sd = 0.2) # the target for the observed feature Next try various kernels and added features with SVM. Can you improve the model by adding an extra feature which might be a function of the first feature? Compare both lm.ridge and svm. Which method produced a better model? (don’t forget to tune your models)

data(iris)
attach(iris)

setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
model <- svm(Species ~ ., data = iris)
x <- subset(iris, select = -Species)
y <- Species
model <- svm(x, y)
print(model)

## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51

summary(model)

## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51
## 
##  ( 8 22 21 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica

pred <- predict(model, x)
pred <- fitted(model)
table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         2
##   virginica       0          2        48

pred <- predict(model, x, decision.values = TRUE)
attr(pred, "decision.values")[1:4,]

##   setosa/versicolor setosa/virginica versicolor/virginica
## 1          1.196152         1.091757            0.6708810
## 2          1.064621         1.056185            0.8483518
## 3          1.180842         1.074542            0.6439798
## 4          1.110699         1.053012            0.6782041

plot(cmdscale(dist(iris[,-5])),col = as.integer(iris[,5]),pch = c("o","+")[1:150 %in% model$index + 1])

x <- seq(0.1, 5, by = 0.05)
y <- log(x) + rnorm(x, sd = 0.2)
m <- svm(x, y)
new <- predict(m, x)
plot(x, y)
points(x, log(x), col = 2)
points(x, new, col = 4)

X <- data.frame(a = rnorm(1000), b = rnorm(1000))
attach(X)
m <- svm(X, gamma = 0.1)
m <- svm(~ a + b, gamma = 0.1)
newdata <- data.frame(a = c(0, 4), b = c(0, 4))
predict (m, newdata)

##     1     2 
##  TRUE FALSE

plot(X, col = 1:1000 %in% m$index + 1, xlim = c(-5,5), ylim=c(-5,5))
points(newdata, pch = "+", col = 2, cex = 5)

i2 <- iris
levels(i2$Species)[3] <- "versicolor"
summary(i2$Species)

##     setosa versicolor 
##         50        100

wts <- 100 / table(i2$Species)
wts

## 
##     setosa versicolor 
##          2          1

m <- svm(Species ~ ., data = i2, class.weights = wts)
model <- svm(Species ~ ., data = iris, gamma = 0.5, cost = 4)
print(model)

## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  49

summary(model)

## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 0.5, cost = 4)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  4 
##       gamma:  0.5 
## 
## Number of Support Vectors:  49
## 
##  ( 11 18 20 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica

x <- subset(iris, select = -Species)
y <- Species
pred <- predict(model, x)
table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         1
##   virginica       0          2        49

obj <- tune(svm, Species~., data = iris,ranges = list(gamma = seq(.5,1.5,0.1), cost = seq(7,9,0.5)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##    0.5    7
## 
## - best performance: 0.06 
## 
## - Detailed performance results:
##    gamma cost      error dispersion
## 1    0.5  7.0 0.06000000 0.07981460
## 2    0.6  7.0 0.06000000 0.07981460
## 3    0.7  7.0 0.06000000 0.07981460
## 4    0.8  7.0 0.06000000 0.07981460
## 5    0.9  7.0 0.06000000 0.07981460
## 6    1.0  7.0 0.06000000 0.07981460
## 7    1.1  7.0 0.06000000 0.07981460
## 8    1.2  7.0 0.06000000 0.07981460
## 9    1.3  7.0 0.06666667 0.07698004
## 10   1.4  7.0 0.06000000 0.07981460
## 11   1.5  7.0 0.06000000 0.07981460
## 12   0.5  7.5 0.06000000 0.07981460
## 13   0.6  7.5 0.06000000 0.07981460
## 14   0.7  7.5 0.06000000 0.07981460
## 15   0.8  7.5 0.06000000 0.07981460
## 16   0.9  7.5 0.06000000 0.07981460
## 17   1.0  7.5 0.06000000 0.07981460
## 18   1.1  7.5 0.06666667 0.07698004
## 19   1.2  7.5 0.06666667 0.07698004
## 20   1.3  7.5 0.06666667 0.07698004
## 21   1.4  7.5 0.06000000 0.07981460
## 22   1.5  7.5 0.06000000 0.07981460
## 23   0.5  8.0 0.06000000 0.07981460
## 24   0.6  8.0 0.06000000 0.07981460
## 25   0.7  8.0 0.06000000 0.07981460
## 26   0.8  8.0 0.06000000 0.07981460
## 27   0.9  8.0 0.06000000 0.07981460
## 28   1.0  8.0 0.06000000 0.07981460
## 29   1.1  8.0 0.06666667 0.07698004
## 30   1.2  8.0 0.06666667 0.07698004
## 31   1.3  8.0 0.06666667 0.07698004
## 32   1.4  8.0 0.06000000 0.07981460
## 33   1.5  8.0 0.06000000 0.07981460
## 34   0.5  8.5 0.06000000 0.07981460
## 35   0.6  8.5 0.06000000 0.07981460
## 36   0.7  8.5 0.06000000 0.07981460
## 37   0.8  8.5 0.06000000 0.07981460
## 38   0.9  8.5 0.06000000 0.07981460
## 39   1.0  8.5 0.06666667 0.07698004
## 40   1.1  8.5 0.06666667 0.07698004
## 41   1.2  8.5 0.06666667 0.07698004
## 42   1.3  8.5 0.06666667 0.07698004
## 43   1.4  8.5 0.06000000 0.07981460
## 44   1.5  8.5 0.06000000 0.07981460
## 45   0.5  9.0 0.06000000 0.07981460
## 46   0.6  9.0 0.06000000 0.07981460
## 47   0.7  9.0 0.06000000 0.07981460
## 48   0.8  9.0 0.06000000 0.07981460
## 49   0.9  9.0 0.06000000 0.07981460
## 50   1.0  9.0 0.06000000 0.07981460
## 51   1.1  9.0 0.06666667 0.07698004
## 52   1.2  9.0 0.06666667 0.07698004
## 53   1.3  9.0 0.06666667 0.07698004
## 54   1.4  9.0 0.06000000 0.07981460
## 55   1.5  9.0 0.06000000 0.07981460

plot(obj)

obj$best.parameters

##   gamma cost
## 1   0.5    7

model <- svm(Species ~ ., data = iris, gamma = 1.0, cost = 8)
print(model)

## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 1, cost = 8)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  8 
##       gamma:  1 
## 
## Number of Support Vectors:  55

summary(model)

## 
## Call:
## svm(formula = Species ~ ., data = iris, gamma = 1, cost = 8)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  8 
##       gamma:  1 
## 
## Number of Support Vectors:  55
## 
##  ( 13 20 22 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica

obj$best.parameters

##   gamma cost
## 1   0.5    7

model$index

##  [1]  14  15  16  19  21  23  26  32  33  37  42  44  45  51  54  58  60
## [18]  61  63  64  65  68  69  71  73  77  78  79  80  84  85  86  99 101
## [35] 107 109 110 111 115 118 119 120 124 126 128 130 132 134 135 136 139
## [52] 142 147 149 150

pred <- predict(model, x)
table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         49         0
##   virginica       0          1        50

149/150 # SVM correctly classifies 99.3% of Iris Data

## [1] 0.9933333

which((pred == iris[,5]) == F) # 84

## [1] 84

model <- svm(Species ~ ., data = iris, gamma = 2.0, cost = 16)
pred <- predict(model, x)
which((pred == iris[,5]) == F)

## integer(0)

table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         50         0
##   virginica       0          0        50

model <- svm(Species ~ ., data = iris, gamma = 3.0, cost = 4)
pred <- predict(model, x)
which((pred == iris[,5]) == F)  # this also gave NO errors

## integer(0)

table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         50         0
##   virginica       0          0        50

data(Glass, package = "mlbench")
str(Glass)

## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...

Glass$Type

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [106] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [141] 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5
## [176] 5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
## [211] 7 7 7 7
## Levels: 1 2 3 5 6 7

index <- 1:nrow(Glass)
set.seed(pi)
testindex <- sample(index, trunc(length(index)/3))


testset <- Glass[testindex, ]
trainset <- Glass[-testindex, ]
svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 18  7  1  0  0  0
##    2  5 18  0  5  0  2
##    3  2  1  2  0  0  0
##    5  0  0  0  2  0  0
##    6  0  0  0  0  1  0
##    7  0  0  0  0  0  7

length(testset[,10])

## [1] 71

1-sum(svm.pred == testset[,10])/length(testset[,10])

## [1] 0.3239437

rpart.model <- rpart(Type ~ ., data = trainset)
rpart.pred <- predict(rpart.model, testset[, -10], type = "class")
table(pred = rpart.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 20  3  0  0  0  0
##    2  4 19  2  1  1  0
##    3  1  3  1  0  0  0
##    5  0  1  0  6  0  0
##    6  0  0  0  0  0  0
##    7  0  0  0  0  0  9

1-sum(rpart.pred == testset[,10])/length(testset[,10])

## [1] 0.2253521

obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = 2^(-4:0), cost = 2^(2:4)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##   gamma cost
##  0.0625   16
## 
## - best performance: 0.3142857 
## 
## - Detailed performance results:
##     gamma cost     error dispersion
## 1  0.0625    4 0.3333333 0.10830935
## 2  0.1250    4 0.3342857 0.07165045
## 3  0.2500    4 0.3490476 0.06044473
## 4  0.5000    4 0.3485714 0.10953071
## 5  1.0000    4 0.3557143 0.12433851
## 6  0.0625    8 0.3209524 0.07148851
## 7  0.1250    8 0.3633333 0.06834803
## 8  0.2500    8 0.3566667 0.09543498
## 9  0.5000    8 0.3628571 0.11715533
## 10 1.0000    8 0.3557143 0.12433851
## 11 0.0625   16 0.3142857 0.08571429
## 12 0.1250   16 0.3357143 0.09718253
## 13 0.2500   16 0.3566667 0.09091927
## 14 0.5000   16 0.3490476 0.11584148
## 15 1.0000   16 0.3633333 0.11097611

plot(obj)

obj$best.parameters

##     gamma cost
## 11 0.0625   16

svm.model <- svm(Type ~ ., data = trainset, cost = 8, gamma = 0.25)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  2  3  0  1
##    3  1  1  0  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  0
##    7  0  0  0  0  0  8

length(testset[,10])

## [1] 71

1-sum(svm.pred == testset[,10])/length(testset[,10])

## [1] 0.2957746

(6+3+1+2+4+4+1)/71 # = 0.2957746

## [1] 0.2957746

obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = seq(0.1,0.15,0.01), cost = seq(15,17,1)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##   0.13   16
## 
## - best performance: 0.2728571 
## 
## - Detailed performance results:
##    gamma cost     error dispersion
## 1   0.10   15 0.2800000  0.1174775
## 2   0.11   15 0.2938095  0.1240342
## 3   0.12   15 0.2866667  0.1265827
## 4   0.13   15 0.2733333  0.1189566
## 5   0.14   15 0.2800000  0.1125485
## 6   0.15   15 0.3014286  0.1280521
## 7   0.10   16 0.2800000  0.1174775
## 8   0.11   16 0.2938095  0.1240342
## 9   0.12   16 0.2866667  0.1265827
## 10  0.13   16 0.2728571  0.1143970
## 11  0.14   16 0.2800000  0.1125485
## 12  0.15   16 0.3014286  0.1280521
## 13  0.10   17 0.2938095  0.1240342
## 14  0.11   17 0.2938095  0.1240342
## 15  0.12   17 0.2800000  0.1256036
## 16  0.13   17 0.2728571  0.1143970
## 17  0.14   17 0.2800000  0.1125485
## 18  0.15   17 0.3014286  0.1280521

plot(obj)

obj$best.parameters

##    gamma cost
## 10  0.13   16

svm.model <- svm(Type ~ ., data = trainset, cost = 16, gamma = 0.13)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  1  3  0  0
##    3  1  1  1  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  1
##    7  0  0  0  0  0  8

length(testset[,10])

## [1] 71

1-sum(svm.pred == testset[,10])/length(testset[,10])

## [1] 0.2816901

(9+1+2+1+3+1+1+1+1)/length(testset[,10]) #[1] 0.2816901

## [1] 0.2816901

obj <- tune(svm, Type~., data = trainset,ranges = list(gamma = seq(0.1,0.15,0.01), cost = seq(15,17,1)),tunecontrol = tune.control(sampling = "cross"))
summary(obj)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##   0.13   17
## 
## - best performance: 0.2657143 
## 
## - Detailed performance results:
##    gamma cost     error dispersion
## 1   0.10   15 0.2938095  0.1466009
## 2   0.11   15 0.3076190  0.1325411
## 3   0.12   15 0.3076190  0.1325411
## 4   0.13   15 0.2866667  0.1331972
## 5   0.14   15 0.2800000  0.1435398
## 6   0.15   15 0.2942857  0.1265827
## 7   0.10   16 0.2866667  0.1409359
## 8   0.11   16 0.3076190  0.1325411
## 9   0.12   16 0.3076190  0.1325411
## 10  0.13   16 0.2800000  0.1395346
## 11  0.14   16 0.2800000  0.1435398
## 12  0.15   16 0.2942857  0.1265827
## 13  0.10   17 0.2938095  0.1302178
## 14  0.11   17 0.3076190  0.1325411
## 15  0.12   17 0.3076190  0.1325411
## 16  0.13   17 0.2657143  0.1608243
## 17  0.14   17 0.2800000  0.1435398
## 18  0.15   17 0.3009524  0.1186724

plot(obj)

obj$best.parameters

##    gamma cost
## 16  0.13   17

svm.model <- svm(Type ~ ., data = trainset, cost = 16, gamma = 0.13)
svm.pred <- predict(svm.model, testset[, -10])
table(pred = svm.pred, true = testset[, 10])

##     true
## pred  1  2  3  5  6  7
##    1 22  9  1  0  0  0
##    2  2 15  1  3  0  0
##    3  1  1  1  0  0  0
##    5  0  0  0  4  0  0
##    6  0  1  0  0  1  1
##    7  0  0  0  0  0  8

length(testset[,10])

## [1] 71

1-sum(svm.pred == testset[,10])/length(testset[,10])

## [1] 0.2816901

(9+1+2+1+3+1+1+1+1)/length(testset[,10]) #[1] 0.2816901

## [1] 0.2816901

x <- seq(0.1, 5, by = 0.05)
y <- log(x) + rnorm(x, sd = 0.2)


m <- svm(x, y)
new <- predict(m, x)
plot(x, y, col = 1)
points(x, log(x), col = 2)
points(x, new, col = 4)
legend(3, -1, c("actual y", "log(x)", "predicted"), col = c(1,2,4), pch=1)

Homework#6

Manjari

Wednesday, March 18, 2015