rm(list = ls())
library(class)
library(e1071)
winequality <- read.csv("/home/archana/ML_works_ucsc/winequality-red.csv", header = TRUE,sep = ";")
train.labels <- winequality[1:1400, 12]
train <- winequality[1:1400, -12]
test.labels <- winequality[1401:1599, 12]
test <- winequality[1401:1599, -12]
Err <- rep(0, 20)
for (i in seq(from = 1, to = 20)) {
out <- knn.cv(train, train.labels, k = i)
Error <- 1 - sum(abs(train.labels == out))/length(out)
Err[i] <- Error
}
Err
## [1] 0.3807 0.4550 0.4621 0.4664 0.4821 0.4979 0.4836 0.4950 0.5007 0.4943
## [11] 0.4964 0.5079 0.5043 0.4957 0.4821 0.4771 0.4757 0.4843 0.4764 0.4686
plot(Err)
bestk = which.min(Err)
bestk
## [1] 1
The best k-nearsest-neighbor is 1, which cooresponds to error of 0.38. 2) Use k-nearest neighbors to classify the Iris data set. Compare the k- nearest neighbor results with the results obtained in class using the Naive Bayes Classifier.
rm(list = ls())
library(class)
library(e1071)
library(klaR)
## Loading required package: MASS
library(MASS)
train.labels <- iris[1:130, 5]
train <- iris[1:130, -5]
test.labels <- iris[131:150, 12]
test <- iris[131:150, -12]
Err <- rep(0, 20)
for (kk in seq(from = 1, to = 20)) {
out <- knn.cv(train, train.labels, k = kk)
Error <- 1 - sum(abs(train.labels == out))/length(out)
Err[kk] <- Error
}
Err
## [1] 0.03846 0.03077 0.03077 0.03846 0.04615 0.03846 0.04615 0.04615
## [9] 0.04615 0.03846 0.04615 0.04615 0.05385 0.05385 0.05385 0.05385
## [17] 0.06154 0.05385 0.06154 0.05385
plot(Err)
bestk = which.min(Err)
bestk
## [1] 2
data(iris)
m <- NaiveBayes(Species ~ ., data = iris)
out <- predict(m)
Err <- 1 - sum(out$class == iris$Species)/length(iris$Species)
Err
## [1] 0.04
#3) Classify the wine quality data using Naive Bayes. Compare the results with the two methods described in problem 1 of this homework set. Think about why one of the methods used works better than the other
winequality <- read.csv("/home/archana/ML_works_ucsc/winequality-red.csv", header = TRUE,sep=";")
winequality$quality <- as.factor(winequality$quality)
m <- NaiveBayes(quality ~ ., data = winequality)
out<-predict(m)
Err <- 1 - sum(out$class == winequality$quality)/length(winequality$quality)
Err
## [1] 0.4396
for the 2) K-Nearest when k = 3, gives a better error 0.030 compared to 0.040 with Naive Bayes. K = 3 > NB for Classifying species in the Iris
for the 3) NaiveBayes, does worse off 0.43197 error compared to k-nearestNeighbor with k = 1 having error of 0.38000
rm(list = ls())
library(class)
library(e1071)
library(klaR)
library(MASS)
library(rpart)
sonar.train <- read.csv("/home/archana/ML_works_ucsc/sonar_train.csv", header = FALSE)
sonar.train$V61 <- as.factor(sonar.train$V61)
m <- NaiveBayes(V61 ~ ., data = sonar.train)
out <- predict(m)
Err <- 1 - sum(out$class == sonar.train$V61)/length(sonar.train$V61)
Err
## [1] 0.2385
train.labels <- sonar.train$V61
train <- sonar.train[, -61]
Err <- rep(0, 20)
for (kk in seq(from = 1, to = 20)) {
out <- knn.cv(train, train.labels, k = kk)
Error <- 1 - sum(abs(train.labels == out))/length(out)
Err[kk] <- Error
}
Err
## [1] 0.2000 0.2538 0.2615 0.2923 0.3000 0.3000 0.3000 0.2769 0.2923 0.2846
## [11] 0.3000 0.3154 0.3154 0.3077 0.3077 0.3077 0.3077 0.3231 0.3231 0.3308
plot(Err)
bestk = which.min(Err)
bestk
## [1] 1
K-neirest with k = 1, does the best with error of 0.2 verus NaiveBayes,which also perform well with an error of 0.2384. It depends on costs of error and ease of computation on ghd dataset that would make the determination for ‘best’ method to be used to classify the sonar data-set.