1(a) Once again check out wine quality data set described in the web page below: http://archive.ics.uci.edu/ml/machine-learning-databases/winequality/winequality.names Remember the Red Wine data set (winequality-red.csv) contains 1599 observations of 11 attributes. The median score of the wine tasters is given in the last column. Note also that the delimiter used in this file is a semi colon and not a comma. This problem is to create an ordinary least squares linear model (use the lm function in R) for this data set using the first 1400 observations. Don’t forget to scale each column before you create the model. Next check the model’s performance on the last 199 observations. How well did the model predict the results for the last 199 observations? What measure did you use to evaluate how well the model did this prediction? Next use the model to predict the results for the whole data set and measure how well your model worked. (hint: use the r function lm and the regression example from class)
#1(a)
setwd("C:/Users/Manjari/Desktop/Machine learning/Home Work Solutions")
wine_data <- read.table("winequality-red.txt", header=TRUE, sep=";")
wine_train = wine_data[1:1400,]
wine_test = wine_data[1401:1599,]
rmse=function(X,Y){
return( sqrt(sum((X-Y)^2)/length(Y)) )
}
y_train = wine_train[,12]
lmodel = lm(quality~., data = wine_train)
wine_train_err = rmse(y_train, predict(lmodel, newdata = wine_train[,-12]))
y_test = wine_test[,12]
wine_test_err = rmse(y_test, predict(lmodel, newdata = wine_test[,-12]))
cat("training error: ", wine_train_err, ", test error: ", wine_test_err, "\n")
## training error: 0.638642 , test error: 0.6977601
#Q1b
y_whole = y_train
y_whole = append(y_whole, y_test)
wine_whole_err = rmse(y_whole,
predict(lmodel, newdata = rbind(wine_train[,-12], wine_test[,-12])))
cat("training error: ", wine_train_err, ", test error: ", wine_test_err,
", whole data error: ", wine_whole_err, "\n")
## training error: 0.638642 , test error: 0.6977601 , whole data error: 0.6462941
library("ridge")
xlambda=rep(0, times = 30)
for(i in seq(from = 0, to = 29)){
exp <- (+3 -4*(i/20))
xlambda[i+1] <- 10^exp
}
wine_train_err_lr = rep(0, times = length(xlambda))
wine_test_err_lr = rep(0, times = length(xlambda))
min_lambda = 10000
for(i in 1:length(xlambda)){
lrmodel = linearRidge(quality~., data = wine_train, lambda = xlambda[i])
wine_train_err_lr[i] = rmse(y_train, predict(lrmodel, newdata = wine_train[,-12]))
wine_test_err_lr[i] = rmse(y_test, predict(lrmodel, newdata = wine_test[,-12]))
if(i > 1 && wine_test_err_lr[i] < (wine_test_err_lr[i-1]-0.005)){
min_lambda = xlambda[i]
index_lambda=i
}
}
plot(1:length(xlambda),wine_train_err_lr,
ylim=c(min(wine_train_err_lr, wine_test_err_lr),
max(wine_train_err_lr, wine_test_err_lr)))
points(1:length(xlambda),wine_test_err_lr, col='red')
cat(index_lambda, "th ", "lambda is optimal: ", min_lambda, "\n")
## 18 th lambda is optimal: 0.3981072
cat("ridge training error: ", wine_train_err_lr[xlambda==min_lambda],
", test error: ", wine_test_err_lr[xlambda==min_lambda], "\n")
## ridge training error: 0.6482447 , test error: 0.7092735
## Q2b
lrmodel2 = linearRidge(quality~., data = wine_train, lambda=min_lambda)
wine_train_err_lr2 = rmse(y_train, predict(lrmodel2, newdata = wine_train[,-12]))
wine_test_err_lr2 = rmse(y_test, predict(lrmodel2, newdata = wine_test[,-12]))
cat("non-ridge training error: ", wine_train_err, ", test error: ", wine_test_err, "\n")
## non-ridge training error: 0.638642 , test error: 0.6977601
cat("ridge training error: ", wine_train_err_lr2, ", test error: ", wine_test_err_lr2, "\n")
## ridge training error: 0.6482447 , test error: 0.7092735
lrmodel0 = linearRidge(quality~., data = wine_train, lambda=0)
#coeficients of the linear and ridge regression model
lrmodel0$coef
## [,1]
## fixed.acidity 1.5267106
## volatile.acidity -7.2101097
## citric.acid -1.0628381
## residual.sugar 0.3144794
## chlorides -3.3323779
## free.sulfur.dioxide 1.3092827
## total.sulfur.dioxide -4.2521873
## density -1.0535142
## pH -1.6969912
## sulphates 5.7144814
## alcohol 11.2483663
lrmodel2$coef
## [,1]
## fixed.acidity 1.7008522
## volatile.acidity -5.3273062
## citric.acid 1.2136676
## residual.sugar 0.5699825
## chlorides -2.5102442
## free.sulfur.dioxide 0.2720990
## total.sulfur.dioxide -3.0673511
## density -2.3773154
## pH -0.3890982
## sulphates 4.1571833
## alcohol 7.8186197
plot(abs(lrmodel0$coef))
points(abs(lrmodel2$coef),col="red")
#Q3a
library(MASS)
iris_data = read.table("irisdata.csv", sep=",", header=FALSE)
# we only need Versicolor and Virginica data.
iris_data = iris_data[51:150,]
target = rep(0,100)
target[iris_data[,5]=="Iris-versicolor"] = 1
target[iris_data[,5]!="Iris-versicolor"] = -1
row.names(iris_data)<-NULL
iris_data = cbind(iris_data, target)
set_seed <- function(i) {
set.seed(i)
if (exists(".Random.seed")) oldseed <- get(".Random.seed", .GlobalEnv)
if (exists(".Random.seed")) assign(".Random.seed", oldseed, .GlobalEnv)
}
k = 10
# 10-fold cross valiation is used.
num_sample = nrow(iris_data)
set_seed(123)
iris_data = iris_data[sample(num_sample, num_sample, replace=FALSE),]
iris_train = iris_data[1:(num_sample*((k-1)/k)),]
iris_cross = iris_data[1:(num_sample*(1/k)),]
error_train_total = matrix(0, nrow = length(xlambda), ncol = 1)
error_cross_total = matrix(0, nrow = length(xlambda), ncol = 1)
for(ilambda in 1:length(xlambda)){
pick = k #pick kth set
error_train = 0
error_cross = 0
for(j in 1:k){
i_tmp = 1
for(i in 1:k){
#choose training set, and cross validation set
if(i == pick){
iris_cross = iris_data[((i-1)*num_sample/k+1):(num_sample*(i/k)),]
} else {
iris_train[((i_tmp-1)*num_sample/k+1):(num_sample*(i_tmp/k)),
] = iris_data[((i-1)*num_sample/k+1):(num_sample*i/k),]
i_tmp = i_tmp + 1
}
}
pick = pick - 1
y_iris_train = iris_train[,6]
x_iris_train = iris_train[,1:4]
yx_iris_train = cbind(x_iris_train, y_iris_train)
y_iris_cross = iris_cross[,6]
x_iris_cross = iris_cross[,1:4]
iris_model = lm.ridge(y_iris_train~., yx_iris_train, lambda=xlambda[ilambda])
A = as.array(iris_model$coef[1:4]/iris_model$scales)
X_train = x_iris_train
for( i in seq(from = 1, to = ncol(x_iris_train))){
X_train[,i] = x_iris_train[,i] - iris_model$xm[i]
}
X_train=as.matrix(X_train)
yh = X_train%*%A + iris_model$ym
yhP = (yh >= 0.0)
yp = (y_iris_train >= 0.0)
error_train = error_train + sum(yhP != yp)/(length(y_iris_train)*k*0.00001/0.00001)
X_cross = x_iris_cross
for( i in seq(from = 1, to = ncol(x_iris_cross))){
X_cross[,i] = x_iris_cross[,i] - iris_model$xm[i]
}
X_cross=as.matrix(X_cross)
yh = X_cross%*%A + iris_model$ym
yhP = (yh >= 0.0)
yp = (y_iris_cross >= 0.0)
error_cross = error_cross + sum(yhP != yp)/(length(y_iris_cross)*k*0.00001/0.00001)
}
error_train_total[ilambda,1] = error_train
error_cross_total[ilambda,1] = error_cross
}
min_iris_lambda <- xlambda[min(which(min(error_cross_total) == error_cross_total))]
th_lambda = min(which(min(error_cross_total) == error_cross_total))
cat(th_lambda, "th lambda", min_iris_lambda, "is optimal.")
## 9 th lambda 25.11886 is optimal.
plot(1:length(xlambda),error_train_total[,1],
ylim=c(min(error_train_total, error_cross_total),
max(error_train_total, error_cross_total)))
points(1:length(xlambda),error_cross_total[,1], col='red')
attach(iris)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-2
library(plyr)
oldpar <- par(no.readonly=TRUE)
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
dim(iris)
## [1] 150 5
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##data manipulation using mutate
iris_test<-mutate(iris, X5 =Sepal.Length^2, X6= Sepal.Length* Sepal.Width,
X7=Sepal.Length*Petal.Length, X8=Sepal.Length*Petal.Width,
X9 =Sepal.Width^2, X10=Sepal.Width*Petal.Length,
X11=Sepal.Width*Petal.Width, X12= Petal.Length^2,
X13=Petal.Length*Petal.Width, X14=Petal.Width^2)
iris_data<-iris[51:150,]
iris_trans1<-iris_test[, -5]
iris_trans2<-iris_test[, 5]
iris_trans<-cbind(iris_trans1, iris_trans2)
##iris_data<-iris[51:150,]
iris_data<-iris_trans[51:150,]
Y1 <- c(rep(-1,100))
Y2 <- Y1
Y1[1:50] <- 1 #Y1 will be used to separate out Versicolor
Y2[51:100] <- 1 #Y2 is used to separate out Virginica
I <- 1:100
target <- cbind(Y1,Y2)
head(target)
## Y1 Y2
## [1,] 1 -1
## [2,] 1 -1
## [3,] 1 -1
## [4,] 1 -1
## [5,] 1 -1
## [6,] 1 -1
X <- iris_data[,-15]
##place holder for input and ouput attirbutes
x_trainIn1<-0.0
x_trainOut1<-0.0
x_trainIn2<-0.0
x_trainOut2<-0.0
nxval=5 ### for 5-fold cross validation
ntol=nrow(iris_data) ##number of rows
y_trainIn<-matrix(nrow = (ntol - ntol/nxval), ncol = 2)
y_trainOut<-matrix(nrow = ntol/nxval, ncol = 2)
testErr <- matrix(nrow = nxval, ncol = 2)
lambda <- matrix(nrow = nxval, ncol = 2)
prolambda <- c(rep(0,2)) # prolambda will hold these three lambdas
for(ident in seq(1,2)){ # find the best lambda for each model
grid=10^seq(10,-2,length=100)
trainErr <- 0.0
bestlam <-0.0
for(ixval in seq(from = 1, to = nxval)){
Iout <- which(I%%nxval== ixval - 1)
Xin <- X[-Iout,]
Xout <- X[Iout,]
Yin <- target[-Iout,ident] # run with Y1, Y2
Yout <- target[Iout,ident] # run with Y1, Y2
dataIn <- cbind(Xin,Yin)
if(ident==1){
y_trainIn[, 1]<-target[-Iout,ident]
y_trainOut[, 1]<-target[Iout,ident]
x_trainIn1<-X[-Iout,]
x_trainOut1<-X[Iout,]
}
else if(ident==2) {
y_trainIn[, 2]<-target[-Iout,ident]
y_trainOut[, 2] <- target[Iout,ident]
x_trainIn2<-X[-Iout,]
x_trainOut2<-X[Iout,]
}
## to get best lambda
cv.out=cv.glmnet(as.matrix(Xin),as.matrix(Yin),alpha=0,lambda=grid)
bestlam=cv.out$lambda.min
# make fair comparison of error
ridgeMod =glmnet(as.matrix(Xin), as.matrix(Yin),alpha=0,lambda=bestlam)
yEst <- predict(ridgeMod, newx = as.matrix(Xout))
testErr[ixval, ident] <-sqrt( sum((Yout-yEst)^2))/length(yEst)
lambda[ixval, ident]<-bestlam
}##end Y1 and Y2 loop
testErr
MintestErr<-apply(testErr, 2, min)
MintestErr # 0.09550798 0.09550798
index<- which(min(testErr[, ident])==testErr[, ident])
print(index)
prolambda[ident]<- lambda[index, ident]
##prolambda1[2]<- lambda[index, 2]
}
## [1] 2
## [1] 2
prolambda ## [1] 0.03053856 0.02310130
## [1] 0.01000000 0.07054802
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
#Create the production model
##appropiate lambda for Y1 (versicolor)
xlambda1 <- prolambda[1]
Xin1 <- x_trainIn1
Yin1 <-y_trainIn[, 1]
dataIn1 <- cbind(Xin1,Yin1)
ridgeMod1 =glmnet(as.matrix(Xin1), as.matrix(Yin1),alpha=0,lambda=xlambda1)
pred_y1 <- predict(ridgeMod1, newx = as.matrix(x_trainOut1))
testErr1 <-sqrt( sum((y_trainOut[, 1] - pred_y1)^2))/length(pred_y1)
testErr1 #[1] 0.1417557
## [1] 0.1417557
###
##comparing predicted and test values
pred1<-prediction(pred_y1, as.matrix(y_trainOut[, 1]))
###Get TPR and FPR values
perf1<-performance(pred1,"tpr","fpr")
##performance(pred1,"auc") #
###appropiate lambda for Y2 (virginica)
xlambda2 <- prolambda[2]
Xin2 <- x_trainIn2
Yin2 <- y_trainIn[, 2]
dataIn2 <- cbind(Xin2,Yin2)
ridgeMod2 =glmnet(as.matrix(Xin2), as.matrix(Yin2),alpha=0,lambda=xlambda2)
##
pred_y2 <- predict(ridgeMod2, newx = as.matrix(x_trainOut2))
##error
testErr2 <-sqrt( sum((y_trainOut[, 2] -pred_y2)^2))/length(pred_y2)
testErr2 #[1] 0.1430702
## [1] 0.1424886
pred2<-prediction(pred_y2, as.matrix(y_trainOut[, 2]))
perf2<-performance(pred2,"tpr","fpr")
##performance(pred2,"auc") #
##ROCR gives S4 class variable and it doesn't work like S3 which we used to
## Need to call with '@' instead of '$'. It doesn't work for points
##Need to change data.frame then as.matrix (direct transformation doesn't work)
x.values= data.frame(perf2@x.values)
y.values= data.frame(perf2@y.values)
plot(perf1,col="green", type = "l") # plot ROC curve
points(as.matrix(x.values), as.matrix(y.values),col="blue", type = "p") # plot ROC curve
abline(0,1)
legend("bottomright", c("Versicolor","Virginica"), cex = 0.6,
pch = c(20,20), col = c("green", "blue"))# lty = 1:2,
## Q5
k = 10
glass_data = read.table("glass.txt", header=FALSE, sep=',')
glass_data = glass_data[1:163,]
row.names(glass_data)<-NULL
num_sample = nrow(glass_data)
glass_data = glass_data[sample(num_sample, num_sample, replace=FALSE),]
class1 = rep(0,163)
class2 = rep(0,163)
class3 = rep(0,163)
class1[glass_data[,11]==1] = 1
class1[glass_data[,11]!=1] = -1
class2[glass_data[,11]==2] = 1
class2[glass_data[,11]!=2] = -1
class3[glass_data[,11]==3] = 1
class3[glass_data[,11]!=3] = -1
glass_data = cbind(glass_data, class1, class2, class3)
min_glass_lambda=rep(1000,3)
glass_train = glass_data[1:round((num_sample*((k-1)/k))),]
error_glass_train_total = matrix(0, nrow = length(xlambda), ncol = 3)
error_glass_cross_total = matrix(0, nrow = length(xlambda), ncol = 3)
th_lambda = c(0,0,0)
min_error = c(0,0,0)
for(iy in 1:3){
for(ilambda in 1:length(xlambda)){
pick = k #pick kth set
error_train = 0
error_cross = 0
for(j in 1:k){
i_tmp = 1
for(i in 1:k){
#choose training set, and cross validation set
if(i == pick){
glass_cross = glass_data[(round((i-1)*num_sample/k)+1):(round(num_sample*(i/k))),]
} else {
glass_train[((i_tmp-1)*num_sample/k+1):(num_sample*(i_tmp/k)),
] = glass_data[((i-1)*num_sample/k+1):(num_sample*(i/k)),]
i_tmp = i_tmp + 1
}
}
pick = pick - 1
y_glass_train = glass_train[,11+iy]
x_glass_train = glass_train[,1:10]
yx_glass_train = cbind(x_glass_train, y_glass_train)
y_glass_cross = glass_cross[,11+iy]
x_glass_cross = glass_cross[,1:10]
glass_model = lm.ridge(y_glass_train~., yx_glass_train, lambda=xlambda[ilambda])
A = as.array(glass_model$coef[1:10]/glass_model$scales)
X_train = x_glass_train
for( i in seq(from = 1, to = ncol(x_glass_train))){
X_train[,i] = x_glass_train[,i] - glass_model$xm[i]
}
X_train=as.matrix(X_train)
yh = X_train%*%A + glass_model$ym
yhP = (yh >= 0.0)
yp = (y_glass_train >= 0.0)
error_train = error_train + sum(yhP != yp)/(length(y_glass_train)*k*0.00001/0.00001)
X_cross = x_glass_cross
for( i in seq(from = 1, to = ncol(x_glass_cross))){
X_cross[,i] = x_glass_cross[,i] - glass_model$xm[i]
}
X_cross=as.matrix(X_cross)
yh = X_cross%*%A + glass_model$ym
yhP = (yh >= 0.0)
yp = (y_glass_cross >= 0.0)
error_cross = error_cross + sum(yhP != yp)/(length(y_glass_cross)*k*0.00001/0.00001)
}
error_glass_train_total[ilambda,iy] = error_train
error_glass_cross_total[ilambda,iy] = error_cross
}
min_glass_lambda[iy] <- xlambda[min(which(min(error_glass_cross_total[,iy
]) == error_glass_cross_total[,iy]))]
th_lambda[iy]=min(which(min(error_glass_cross_total[,iy]) == error_glass_cross_total[,iy]))
}
sprintf("minimum classification error are %f, %f, %f",
error_glass_cross_total[5,1],
error_glass_cross_total[11,2],
error_glass_cross_total[13,3])
## [1] "minimum classification error are 0.018382, 0.171691, 0.091912"
## [1] "minimum classification error are 0.018382, 0.165441, 0.085662"
plot(1:length(xlambda),error_glass_train_total[,1],
ylim=c(min(error_glass_train_total, error_glass_cross_total),
max(error_glass_train_total, error_glass_cross_total)))
points(1:length(xlambda),error_glass_cross_total[,1], col='red')
plot(1:length(xlambda),error_glass_train_total[,2],
ylim=c(min(error_glass_train_total, error_glass_cross_total),
max(error_glass_train_total, error_glass_cross_total)))
points(1:length(xlambda),error_glass_cross_total[,2], col='red')
plot(1:length(xlambda),error_glass_train_total[,3],
ylim=c(min(error_glass_train_total, error_glass_cross_total),
max(error_glass_train_total, error_glass_cross_total)))
points(1:length(xlambda),error_glass_cross_total[,3], col='red')