UCSC Machine Learning - Homework 3 (About Ordinary Regression & Ridge Regression)

R packages used in this assignment:

rpart ridge MASS glmnet ROCR dplyr

Question 1: This question uses the following ages for a set of trees:

Once again check out wine quality data set described in the web page below: http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names Remember the Red Wine data set (winequality-red.csv) contains 1599 observations of 11 attributes. The median score of the wine tasters is given in the last column. Note also that the delimiter used in this file is a semi colon and not a comma. This problem is to create an ordinary least squares linear model (use the lm function in R) for this data set using the first 1400 observations. Don’t forget to scale each column before you create the model. Next check the model’s performance on the last 199 observations. How well did the model predict the results for the last 199 observations? What measure did you use to evaluate how well the model did this prediction? Next use the model to predict the results for the whole data set and measure how well your model worked. (hint: use the r function lm and the regression example from class)

## Some house keep jobs: 
# Set working directory:
rm(list=ls())
require(rpart)

## Loading required package: rpart

train<-read.csv("C:/Users/Andrew/SkyDrive/AGZ_Home/workspace_R/UCSC/MachinLearning/All_data/winequality-red.txt",sep=";",header=TRUE)
head(train)

##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5

dim(train)

## [1] 1599   12

data_scaled<-as.data.frame(scale(train[,]))
head(data_scaled)

##   fixed.acidity volatile.acidity citric.acid residual.sugar   chlorides
## 1    -0.5281944        0.9615758   -1.391037    -0.45307667 -0.24363047
## 2    -0.2984541        1.9668271   -1.391037     0.04340257  0.22380518
## 3    -0.2984541        1.2966596   -1.185699    -0.16937425  0.09632273
## 4     1.6543385       -1.3840105    1.483689    -0.45307667 -0.26487754
## 5    -0.5281944        0.9615758   -1.391037    -0.45307667 -0.24363047
## 6    -0.5281944        0.7381867   -1.391037    -0.52400227 -0.26487754
##   free.sulfur.dioxide total.sulfur.dioxide    density         pH
## 1         -0.46604672           -0.3790141 0.55809987  1.2882399
## 2          0.87236532            0.6241680 0.02825193 -0.7197081
## 3         -0.08364328            0.2289750 0.13422152 -0.3310730
## 4          0.10755844            0.4113718 0.66406945 -0.9787982
## 5         -0.46604672           -0.3790141 0.55809987  1.2882399
## 6         -0.27484500           -0.1966174 0.55809987  1.2882399
##     sulphates    alcohol    quality
## 1 -0.57902538 -0.9599458 -0.7875763
## 2  0.12891007 -0.5845942 -0.7875763
## 3 -0.04807379 -0.5845942 -0.7875763
## 4 -0.46103614 -0.5845942  0.4507074
## 5 -0.57902538 -0.9599458 -0.7875763
## 6 -0.57902538 -0.9599458 -0.7875763

y_train<-train[, 12]
head(y_train)

## [1] 5 5 5 6 5 5

data_trainIn<-as.data.frame(scale(train[1:1400, ]))
head(data_trainIn)

##   fixed.acidity volatile.acidity citric.acid residual.sugar   chlorides
## 1    -0.6217657        0.9709908   -1.429300    -0.49308621 -0.26869645
## 2    -0.3934452        1.9759178   -1.429300     0.05382623  0.18205794
## 3    -0.3934452        1.3059664   -1.224791    -0.18056482  0.05912493
## 4     1.5472792       -1.3738389    1.433828    -0.49308621 -0.28918528
## 5    -0.6217657        0.9709908   -1.429300    -0.49308621 -0.26869645
## 6    -0.6217657        0.7476737   -1.429300    -0.57121656 -0.28918528
##   free.sulfur.dioxide total.sulfur.dioxide     density         pH
## 1         -0.43642897           -0.3902667  0.49738799  1.3395776
## 2          0.92958952            0.6046744 -0.03340185 -0.6548535
## 3         -0.04613797            0.2127279  0.07275612 -0.2688346
## 4          0.14900753            0.3936263  0.60354596 -0.9121995
## 5         -0.43642897           -0.3902667  0.49738799  1.3395776
## 6         -0.24128347           -0.2093684  0.49738799  1.3395776
##     sulphates    alcohol    quality
## 1 -0.55618727 -0.9250909 -0.7970650
## 2  0.12965728 -0.5557843 -0.7970650
## 3 -0.04180386 -0.5557843 -0.7970650
## 4 -0.44187984 -0.5557843  0.4455753
## 5 -0.55618727 -0.9250909 -0.7970650
## 6 -0.55618727 -0.9250909 -0.7970650

y_trainIn<-train[1:1400, 12]
head(y_trainIn)

## [1] 5 5 5 6 5 5

## select last 199 rows for cross validation
data_trainOut<-as.data.frame(scale(train[1401:1599 , ]))
head(data_trainOut)

##      fixed.acidity volatile.acidity  citric.acid residual.sugar  chlorides
## 1401    0.72341166        0.8393421 -0.003382688    -0.23327968  0.1381207
## 1402    0.72341166        0.8393421 -0.003382688    -0.23327968  0.1381207
## 1403    0.44297220       -1.3440445  1.174638570    -0.28060145 -0.7992829
## 1404    0.06905293       -1.1760917  0.669772317    -0.42256677 -0.4979746
## 1405    0.81689148       -0.2243591  1.006349819     0.00332917  0.2050781
## 1406    0.53645202       -1.4560130  0.501483565    -0.28060145 -0.4644959
##      free.sulfur.dioxide total.sulfur.dioxide     density         pH
## 1401          1.24569608           3.17999948  0.15918760 -0.9800461
## 1402          1.24569608           3.17999948  0.15918760 -0.9800461
## 1403         -1.10689620          -0.62101330  0.21789466  0.4892459
## 1404         -1.36829534          -0.97837347  0.04177347 -1.1347085
## 1405         -0.58409791           0.09370705  1.50945007  0.4119147
## 1406         -0.06129963          -0.29614041 -0.42788304 -0.7480527
##       sulphates    alcohol    quality
## 1401 -1.2351520 -0.7269226 -0.7219613
## 1402 -1.2351520 -0.7269226 -0.7219613
## 1403  1.2508684  1.4405030  0.4853521
## 1404  3.4963062 -0.6185513  2.8999791
## 1405 -0.3530157  0.1400476  0.4853521
## 1406  1.8924221  0.7902753  1.6926656

y_trainOut<-train[1401:1599, 12]
head(y_trainOut)

## [1] 5 5 6 8 6 7

## predic model for last 199 observations
lmmodel <- lm(y_trainIn~., data = data_trainIn)
lmmodel

## 
## Call:
## lm(formula = y_trainIn ~ ., data = data_trainIn)
## 
## Coefficients:
##          (Intercept)         fixed.acidity      volatile.acidity  
##            5.641e+00            -1.733e-15            -1.423e-16  
##          citric.acid        residual.sugar             chlorides  
##            1.382e-15             1.914e-16            -2.769e-16  
##  free.sulfur.dioxide  total.sulfur.dioxide               density  
##            6.395e-16            -1.273e-16             3.607e-17  
##                   pH             sulphates               alcohol  
##           -2.079e-15             1.940e-17             1.796e-15  
##              quality  
##            8.047e-01

yEst<- predict(lmmodel, newdata = data_trainOut)
head(yEst)

##     1401     1402     1403     1404     1405     1406 
## 5.060439 5.060439 6.032010 7.975152 6.032010 7.003581

# Error:
error <- sqrt((sum((y_trainOut-yEst)^2))/length(y_trainOut))
error

## [1] 0.04938217

## predic model for last whole data observations
yEst_all<- predict(lmmodel, newdata = data_scaled)
head(yEst_all)

##        1        2        3        4        5        6 
## 5.007636 5.007636 5.007636 6.004130 5.007636 5.007636

error_all <- sqrt((sum((y_train-yEst_all)^2))/length(y_train))
error_all

## [1] 0.006102212

Question 2: Perform a ridge regression on the wine quality data set from problem 1

using only the first 1400 observations. Compare the results of applying the ridge regression model to the last 199 observations with the results of applying the ordinary least square model to these observations. Compare the coefficients resulting from the ridge regression with the coefficients that were obtained in problem 1. What conclusions can you make from this comparison?

# Method 1: Using ridge
require(rpart)
library(ridge)

## Warning: package 'ridge' was built under R version 3.1.2

train<-read.csv("C:/Users/Andrew/SkyDrive/AGZ_Home/workspace_R/UCSC/MachinLearning/All_data/winequality-red.txt",sep=";",header=TRUE)
data_scaled<-as.data.frame(scale(train[,]))
y_train<-train[, 12]

data_trainIn<-as.data.frame(scale(train[1:1400, ]))
y_trainIn<-train[1:1400, 12]

##select last 199 rows for cross validation
data_trainOut<-as.data.frame(scale(train[1401:1599 , ]))
y_trainOut<-train[1401:1599, 12]


## predic model for last 199 observations
lmmodel <- lm(y_trainIn~., data = data_trainIn)
lmmodel

## 
## Call:
## lm(formula = y_trainIn ~ ., data = data_trainIn)
## 
## Coefficients:
##          (Intercept)         fixed.acidity      volatile.acidity  
##            5.641e+00            -1.733e-15            -1.423e-16  
##          citric.acid        residual.sugar             chlorides  
##            1.382e-15             1.914e-16            -2.769e-16  
##  free.sulfur.dioxide  total.sulfur.dioxide               density  
##            6.395e-16            -1.273e-16             3.607e-17  
##                   pH             sulphates               alcohol  
##           -2.079e-15             1.940e-17             1.796e-15  
##              quality  
##            8.047e-01

yEst<- predict(lmmodel, newdata = data_trainOut)
error <- sqrt(sum((y_trainOut-yEst)^2)/length(y_trainOut))
error  # [1] 0.04938217

## [1] 0.04938217

lmrdgemodel<-linearRidge(y_trainIn~.,data_trainIn)
lmrdgemodel

## 
## Call:
## linearRidge(formula = y_trainIn ~ ., data = data_trainIn)
## 
##          (Intercept)        fixed.acidity     volatile.acidity 
##         5.641429e+00         2.162746e-05        -1.016160e-04 
##          citric.acid       residual.sugar            chlorides 
##        -1.491799e-05         4.473866e-06        -4.697126e-05 
##  free.sulfur.dioxide total.sulfur.dioxide              density 
##         1.843231e-05        -5.993029e-05        -1.498896e-05 
##                   pH            sulphates              alcohol 
##        -2.383937e-05         8.056200e-05         1.584799e-04 
##              quality 
##         8.043137e-01

yEst_ridge<- predict(lmrdgemodel, newdata = data_trainOut)
error_ridge <- sqrt(sum((y_trainOut-yEst_ridge)^2)/length(y_trainOut))
error_ridge

## [1] 0.04951383

# Very close to the Q1.

Question 3: This problem uses the Iris Data Set.

It only involves the Versicolor and Virginica species (rows 51 through 150). Use cross validated ridge regression to classify these two species. Create and plot a ROC curve for this classification method.

set.seed(pi)
attach(iris)
library(MASS)

## Warning: package 'MASS' was built under R version 3.1.2

library(ridge)
library(glmnet)

## Warning: package 'glmnet' was built under R version 3.1.2

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 3.1.2

## Loaded glmnet 1.9-8

data(iris)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

oldpar <- par(no.readonly=TRUE)
head(oldpar)

## $xlog
## [1] FALSE
## 
## $ylog
## [1] FALSE
## 
## $adj
## [1] 0.5
## 
## $ann
## [1] TRUE
## 
## $ask
## [1] FALSE
## 
## $bg
## [1] "transparent"

names(iris)

## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"

dim(iris)

## [1] 150   5

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

iris_data<-iris[51:150,]

Y1 <- c(rep(-1,100))
Y1

##   [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [24] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [47] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [70] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [93] -1 -1 -1 -1 -1 -1 -1 -1

Y2 <- Y1

Y1[1:50] <- 1        #Y1 will be used to separate out Versicolor
Y2[51:100] <- 1      #Y2 is used to separate out Virginica
I <- 1:100
target <- cbind(Y1,Y2)

X <- iris_data[,-5]

##place holder for input and ouput attirbutes
x_trainIn1<-0.0
x_trainOut1<-0.0
x_trainIn2<-0.0
x_trainOut2<-0.0

nxval=5 ### for 5-fold cross validation

ntol=nrow(iris_data) ##number of rows
y_trainIn<-matrix(nrow = (ntol - ntol/nxval), ncol = 2)
y_trainOut<-matrix(nrow = ntol/nxval, ncol = 2)

testErr <- matrix(nrow = nxval, ncol = 2)
lambda <- matrix(nrow = nxval, ncol = 2)
prolambda <- c(rep(0,2)) # prolambda will hold these three lambdas

for(ident in seq(1,2)){  # find the best lambda for each model
    grid=10^seq(10,-2,length=100)
    trainErr <- 0.0
    bestlam <-0.0
    
    for(ixval in seq(from = 1, to = nxval)){
        Iout <- which(I%%nxval== ixval - 1)
        Xin <- X[-Iout,]
        Xout <- X[Iout,]
        Yin <- target[-Iout,ident]  # run with Y1, Y2 
        Yout <- target[Iout,ident]  # run with Y1, Y2
        dataIn <- cbind(Xin,Yin)
        
        if(ident==1){
            y_trainIn[, 1]<-target[-Iout,ident]
            y_trainOut[, 1]<-target[Iout,ident]
            x_trainIn1<-X[-Iout,]
            x_trainOut1<-X[Iout,]
            }
        else if(ident==2) {
            y_trainIn[, 2]<-target[-Iout,ident]
            y_trainOut[, 2] <- target[Iout,ident]
            x_trainIn2<-X[-Iout,]
            x_trainOut2<-X[Iout,]
            }
        
        ## to get best lambda
        cv.out=cv.glmnet(as.matrix(Xin),as.matrix(Yin),alpha=0,lambda=grid)
        bestlam=cv.out$lambda.min
        
        # make fair comparison of error
        ridgeMod =glmnet(as.matrix(Xin), as.matrix(Yin),alpha=0,lambda=bestlam)
        yEst <- predict(ridgeMod, newx = as.matrix(Xout))
    
        testErr[ixval, ident] <-sqrt( sum((Yout-yEst)^2))/length(yEst)
        lambda[ixval, ident]<-bestlam
        
        }##end Y1 and Y2 loop
    
    MintestErr<-apply(testErr, 2, min)
    MintestErr #0.09318130 0.09427988
    index<- which(min(testErr[, ident])==testErr[, ident])
    print(index)  
    prolambda[ident]<- lambda[index, ident]
    ##prolambda1[2]<- lambda[index, 2]
}

## [1] 2
## [1] 2

prolambda ## [1] 0.03053856 0.02310130

## [1] 0.01321941 0.01000000

library(ROCR)

## Warning: package 'ROCR' was built under R version 3.1.2

## Loading required package: gplots

## Warning: package 'gplots' was built under R version 3.1.2

## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

#Create the production model

##appropiate lambda for Y1 (versicolor)
xlambda1 <- prolambda[1]
Xin1 <- x_trainIn1
Yin1 <-y_trainIn[, 1]
dataIn1 <- cbind(Xin1,Yin1)
ridgeMod1 =glmnet(as.matrix(Xin1), as.matrix(Yin1),alpha=0,lambda=xlambda1)

pred_y1 <- predict(ridgeMod1, newx = as.matrix(x_trainOut1))
testErr1 <-sqrt( sum((y_trainOut[, 1] - pred_y1)^2))/length(pred_y1)
testErr1 #[1] 0.1435072

## [1] 0.1439283

##comparing predicted and test values
pred1<-prediction(pred_y1, as.matrix(y_trainOut[, 1]))

###Get TPR and FPR values 
perf1<-performance(pred1,"tpr","fpr")

##performance(pred1,"auc") #
###appropiate lambda for Y2 (virginica)
xlambda2 <- prolambda[2]
Xin2 <- x_trainIn2
Yin2 <- y_trainIn[, 2]
dataIn2 <- cbind(Xin2,Yin2)
ridgeMod2 =glmnet(as.matrix(Xin2), as.matrix(Yin2),alpha=0,lambda=xlambda2)

##
pred_y2 <- predict(ridgeMod2, newx = as.matrix(x_trainOut2))

##error
testErr2 <-sqrt( sum((y_trainOut[, 2] -pred_y2)^2))/length(pred_y2)
testErr2 #[1]  0.1442572

## [1] 0.1442572

pred2<-prediction(pred_y2, as.matrix(y_trainOut[, 2]))
perf2<-performance(pred2,"tpr","fpr")

##performance(pred2,"auc") #
##ROCR gives S4  class variable and it doesn't work like S3 which we used to
## Need to call with '@' instead of '$'. It doesn't work for points
##Need to change data.frame then as.matrix (direct transformation doesn't work)
x.values= data.frame(perf2@x.values)
y.values= data.frame(perf2@y.values)

plot(perf1,col="red", type = "l") # plot ROC curve

points(as.matrix(x.values), as.matrix(y.values),col="blue", type = "p") # plot ROC curve
abline(0,1)
legend("bottomright", c("Versicolor","Virginica"), cex = 0.6, 
       pch = c(20,20), col = c("red", "blue"))# lty = 1:2,

Question 4: See if you can improve on regression-based classification of the iris data that we did in class.

Classify the iris data set with second degree terms added using a ridge regression. (ie supplement the original 4 attributes x1, x2, x3, and x4 by including the 10 second degree terms ( x1x1, x1x2, x1*x3, . ) for a total of 14 attributes.) Use multiclass to classify the data and then compare the results with the results obtained in class.

It is fine to use brute force to add these attributes. For those who are adventurous, investigate the function mutate in the package plyr.

##This solve probvlem 3 on Versicolor and
##Virginica species

attach(iris)

## The following objects are masked from iris (position 8):
## 
##     Petal.Length, Petal.Width, Sepal.Length, Sepal.Width, Species

library(glmnet)
library(plyr)

oldpar <- par(no.readonly=TRUE)
names(iris)

## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"

dim(iris)

## [1] 150   5

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

##data manipulation using mutate
iris_test<-mutate(iris, X5 =Sepal.Length^2, X6= Sepal.Length* Sepal.Width,
                   X7=Sepal.Length*Petal.Length, X8=Sepal.Length*Petal.Width,
                   X9 =Sepal.Width^2, X10=Sepal.Width*Petal.Length, 
                   X11=Sepal.Width*Petal.Width, X12= Petal.Length^2, 
                   X13=Petal.Length*Petal.Width, X14=Petal.Width^2)
                   
iris_data<-iris[51:150,]                  
iris_trans1<-iris_test[, -5]   
iris_trans2<-iris_test[, 5]
iris_trans<-cbind(iris_trans1, iris_trans2)


##iris_data<-iris[51:150,]
iris_data<-iris_trans[51:150,]   


Y1 <- c(rep(-1,100))
Y2 <- Y1

Y1[1:50] <- 1        #Y1 will be used to separate out Versicolor
Y2[51:100] <- 1      #Y2 is used to separate out Virginica
I <- 1:100
target <- cbind(Y1,Y2)
head(target)

##      Y1 Y2
## [1,]  1 -1
## [2,]  1 -1
## [3,]  1 -1
## [4,]  1 -1
## [5,]  1 -1
## [6,]  1 -1

X <- iris_data[,-15]

##place holder for input and ouput attirbutes
x_trainIn1<-0.0
x_trainOut1<-0.0
x_trainIn2<-0.0
x_trainOut2<-0.0

nxval=5 ### for 5-fold cross validation

ntol=nrow(iris_data) ##number of rows
y_trainIn<-matrix(nrow = (ntol - ntol/nxval), ncol = 2)
y_trainOut<-matrix(nrow = ntol/nxval, ncol = 2)
testErr <- matrix(nrow = nxval, ncol = 2)
lambda <- matrix(nrow = nxval, ncol = 2)
prolambda <- c(rep(0,2)) # prolambda will hold these three lambdas


for(ident in seq(1,2)){  # find the best lambda for each model
    grid=10^seq(10,-2,length=100)
    trainErr <- 0.0
    bestlam <-0.0
    
    for(ixval in seq(from = 1, to = nxval)){
        Iout <- which(I%%nxval== ixval - 1)
        Xin <- X[-Iout,]
        Xout <- X[Iout,]
        Yin <- target[-Iout,ident]  # run with Y1, Y2 
        Yout <- target[Iout,ident]  # run with Y1, Y2 
        dataIn <- cbind(Xin,Yin)
        
        if(ident==1){
            y_trainIn[, 1]<-target[-Iout,ident] 
            y_trainOut[, 1]<-target[Iout,ident]
            x_trainIn1<-X[-Iout,]
            x_trainOut1<-X[Iout,]
            }
        
        else if(ident==2) {
            y_trainIn[, 2]<-target[-Iout,ident]
            y_trainOut[, 2] <- target[Iout,ident]
            x_trainIn2<-X[-Iout,]
            x_trainOut2<-X[Iout,]
            }
    
## to get best lambda
    cv.out=cv.glmnet(as.matrix(Xin),as.matrix(Yin),alpha=0,lambda=grid)
    bestlam=cv.out$lambda.min
    
    # make fair comparison of error
    ridgeMod =glmnet(as.matrix(Xin), as.matrix(Yin),alpha=0,lambda=bestlam)
    yEst <- predict(ridgeMod, newx = as.matrix(Xout))
    
    testErr[ixval, ident] <-sqrt( sum((Yout-yEst)^2))/length(yEst)
    lambda[ixval, ident]<-bestlam

}##end Y1 and Y2 loop
  
  testErr
  MintestErr<-apply(testErr, 2, min)
  MintestErr # 0.09550798 0.09550798
  index<- which(min(testErr[, ident])==testErr[, ident])
  print(index)  
  prolambda[ident]<- lambda[index, ident]
  ##prolambda1[2]<- lambda[index, 2]
  
}

## [1] 2
## [1] 2

prolambda ## [1] 0.03053856 0.02310130

## [1] 0.07054802 0.07054802

##y_trainOut
#library(arulesViz)
library(ROCR)

#Create the production model

##appropiate lambda for Y1 (versicolor)
xlambda1 <- prolambda[1]
Xin1 <- x_trainIn1
Yin1 <-y_trainIn[, 1]
dataIn1 <- cbind(Xin1,Yin1)
ridgeMod1 =glmnet(as.matrix(Xin1), as.matrix(Yin1),alpha=0,lambda=xlambda1)

pred_y1 <- predict(ridgeMod1, newx = as.matrix(x_trainOut1))
testErr1 <-sqrt( sum((y_trainOut[, 1] - pred_y1)^2))/length(pred_y1)
testErr1 #[1] 0.1417557

## [1] 0.1424886

###
##comparing predicted and test values
pred1<-prediction(pred_y1, as.matrix(y_trainOut[, 1]))
###Get TPR and FPR values 
perf1<-performance(pred1,"tpr","fpr")
##performance(pred1,"auc") #

###appropiate lambda for Y2 (virginica)
xlambda2 <- prolambda[2]
Xin2 <- x_trainIn2
Yin2 <- y_trainIn[, 2]
dataIn2 <- cbind(Xin2,Yin2)
ridgeMod2 =glmnet(as.matrix(Xin2), as.matrix(Yin2),alpha=0,lambda=xlambda2)

##
pred_y2 <- predict(ridgeMod2, newx = as.matrix(x_trainOut2))
##error
testErr2 <-sqrt( sum((y_trainOut[, 2] -pred_y2)^2))/length(pred_y2)
testErr2 #[1]  0.1430702

## [1] 0.1424886

pred2<-prediction(pred_y2, as.matrix(y_trainOut[, 2]))
perf2<-performance(pred2,"tpr","fpr")
##performance(pred2,"auc") #
##ROCR gives S4  class variable and it doesn't work like S3 which we used to
## Need to call with '@' instead of '$'. It doesn't work for points
##Need to change data.frame then as.matrix (direct transformation doesn't work)
x.values= data.frame(perf2@x.values)
y.values= data.frame(perf2@y.values)

plot(perf1,col="green", type = "l") # plot ROC curve
points(as.matrix(x.values), as.matrix(y.values),col="blue", type = "p") # plot ROC curve
abline(0,1)
legend("bottomright", c("Versicolor","Virginica"), cex = 0.6, 
       pch = c(20,20), col = c("green", "blue"))# lty = 1:2,

Question 5: This is a multi-class problem. Consider the Glass Identification Data Set from the UC Irvine Data Repository.

The Data is located at the web site: http://archive.ics.uci.edu/ml/datasets/Glass%2BIdentification This problem will only work with building and vehicle window glass (classes 1,2 and 3), so it only uses the first 163 rows of data. (Ignore rows 164 through 214) With this set up this is a three class problem. Use ridge regression to classify this data into the three classes: building windows float processed, building windows non float processed, and vehicle windows float processed.

## Q 5 will try to solve probvlem 3 on Versicolor and  Virginica species
rm(list=ls())
set.seed(pi)

library(glmnet)

oldpar <- par(no.readonly=TRUE)
#oldpar <- par(no.readonly=TRUE)
glass <- read.csv("C:/Users/Andrew/SkyDrive/AGZ_Home/workspace_R/UCSC/MachinLearning/All_data/glass.txt",sep = ",", header = FALSE)
names(glass)

##  [1] "V1"  "V2"  "V3"  "V4"  "V5"  "V6"  "V7"  "V8"  "V9"  "V10" "V11"

str(glass)

## 'data.frame':    214 obs. of  11 variables:
##  $ V1 : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ V2 : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ V3 : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ V4 : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ V5 : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ V6 : num  71.8 72.7 73 72.6 73.1 ...
##  $ V7 : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ V8 : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ V9 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V10: num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ V11: int  1 1 1 1 1 1 1 1 1 1 ...

dim(glass)

## [1] 214  11

glass_data<-glass[1:163,]

Y1 <- c(rep(-1,163))
Y1

##   [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [24] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [47] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [70] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
##  [93] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [116] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [139] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [162] -1 -1

Y2 <- Y1
Y3 <-Y1

Y1[1:70] <- 1        #Y1 will be used to separate out building windows float processed
Y2[71:146] <- 1      #Y2 is used to separate out building windows non float processed
Y3[147:163] <- 1     #Y3 is used to separate out vehicle windows float processed
I <- 1:163
target <-cbind(Y1, Y2, Y3)


X <- glass_data[,-11]

##place holder for input and ouput attirbutes
x_trainIn<-0.0
x_trainOut<-0.0

nxval=10 ### for 5-fold cross validation

ntol=nrow(glass_data) ##number of rows
y_trainIn<-matrix(nrow = 147, ncol = 3)
y_trainOut<-matrix(nrow = 16, ncol = 3)

testErr <- matrix(nrow = 10, ncol = 3)
lambda <- matrix(nrow = 10, ncol = 3)
y_trainIn1<-0.0
y_trainIn2<-0.0
y_trainIn3<-0.0

y_trainOut1<-0.0
y_trainOut2<-0.0
y_trainOut3<-0.0

x_trainIn1<-0.0
x_trainIn2<-0.0
x_trainIn3<-0.0

x_trainOut1<-0.0
x_trainOut2<-0.0
x_trainOut3<-0.0

prolambda <- c(rep(0,3)) # prolambda will hold these three lambdas

index<-matrix(nrow = 3, ncol=3)
for(ident in seq(1,3)){  # find the best lambda for each model
    grid=10^seq(10,-2,length=50)
    trainErr <- 0.0
    bestlam <-0.0
    
    for(ixval in seq(from = 1, to = 10)){
        Iout <- which(I%%10== ixval - 1)
        ##Iout <- which(I%%10== 10%%ixval)
        Xin <- X[-Iout,]
        Xout <- X[Iout,]
        Yin <- target[-Iout,ident]  # run with Y1, Y2 ,Y3
        Yout <- target[Iout,ident]  # run with Y1, Y2 ,Y3
        dataIn <- cbind(Xin,Yin)
        x_trainIn<-Xin
        x_trainOut<-Xout
            
     ## to get best lambda
        cv.out=cv.glmnet(as.matrix(Xin),as.matrix(Yin),alpha=0,lambda=grid)
        bestlam=cv.out$lambda.min
     
     # make fair comparison of error
     ridgeMod =glmnet(as.matrix(Xin), as.matrix(Yin),alpha=0,lambda=bestlam)
     yEst <- predict(ridgeMod, newx = as.matrix(Xout))
     
     testErr[ixval, ident] <-sqrt( sum((Yout-yEst)^2))/length(yEst)
     lambda[ixval, ident]<-bestlam
     
     if(ident==1){
         ##y_trainIn[, 1]<-target[-Iout, ident] 
         ##y_trainOut[, 1]<-target[Iout, ident]
         y_trainIn1<-target[-Iout, ident]
         y_trainOut1<-target[Iout, ident]
         x_trainIn1<-X[-Iout,]
         x_trainOut1<-X[Iout,]
         }
  else if(ident==2) {
      # y_trainIn[, 2]<-target[-Iout,ident] 
      #y_trainOut[, 2] <- target[Iout,ident]
      y_trainIn2<-target[-Iout, ident]
      y_trainOut2<-target[Iout, ident]
      x_trainIn2<-X[-Iout,]
      x_trainOut2<-X[Iout,]
      }
  
  else if(ident==3){
      y_trainIn3<-target[-Iout, ident]
      y_trainOut3<-target[Iout, ident]
      x_trainIn3<-X[-Iout,]
      x_trainOut3<-X[Iout,]
    }
  }##end Y1 and Y2 loop
  
  testErr
  MintestErr<-apply(testErr, 2, min)
  MintestErr #0.1050712 0.1689575 0.1078599
  index<- min(which(min(testErr[, ident])==testErr[, ident]))
  
  ##index1<- min(which(min(testErr[, 1])==testErr[, 1]))
  index
  
  ##print(index)  
  prolambda[ident]<- lambda[index, ident]
  
  ##prolambda1[2]<- lambda[index, 2]
    
 }


prolambda

## [1] 0.09540955 0.09540955 0.01757511

# prolambda ## [1] 0.03053856 0.02310130
# 

##y_trainOut
library(ROCR)
#Create the production model

##appropiate lambda for Y1 
xlambda1 <- prolambda[1]
Xin1 <- x_trainIn1
Yin1 <-y_trainIn1

#dataIn1 <- cbind(Xin,Yin1)
ridgeMod1 =glmnet(as.matrix(Xin1), as.matrix(Yin1),alpha=0,lambda=xlambda1)

pred_y1 <- predict(ridgeMod1, newx = as.matrix(x_trainOut1))
testErr1 <-sqrt( sum((y_trainOut1 - pred_y1)^2))/length(pred_y1)
testErr1

## [1] 0.1050712

###
##comparing predicted and test values
pred1<-prediction(pred_y1, as.matrix(y_trainOut1))
###Get TPR and FPR values 
perf1<-performance(pred1,"tpr","fpr")
#performance(pred1,"auc") #


##appropiate lambda for Y1 
xlambda2 <- prolambda[2]
Xin2 <-x_trainIn2
Yin2 <-y_trainIn2

#dataIn1 <- cbind(Xin,Yin1)
ridgeMod2 =glmnet(as.matrix(Xin2), as.matrix(Yin2),alpha=0,lambda=xlambda2)

pred_y2 <- predict(ridgeMod2, newx = as.matrix(x_trainOut2))
testErr2 <-sqrt( sum((y_trainOut2 - pred_y2)^2))/length(pred_y2)
testErr2

## [1] 0.1934413

###
##comparing predicted and test values
pred2<-prediction(pred_y2, as.matrix(y_trainOut2))

###Get TPR and FPR values 
perf2<-performance(pred2,"tpr","fpr")

#performance(pred1,"auc") #
# # ##ROCR gives S4  class variable and it doesn't work like S3 which we used to
# # ## Need to call with '@' instead of '$'. It doesn't work for points
# # ##Need to change data.frame then as.matrix (direct transformation doesn't work)
x2.values= data.frame(perf2@x.values)
y2.values= data.frame(perf2@y.values)


##appropiate lambda for Y1 
xlambda3 <- prolambda[3]
Xin3 <-x_trainIn3
Yin3 <-y_trainIn3

#dataIn1 <- cbind(Xin,Yin1)
ridgeMod3 =glmnet(as.matrix(Xin3), as.matrix(Yin3),alpha=0,lambda=xlambda3)

pred_y3 <- predict(ridgeMod3, newx = as.matrix(x_trainOut3))
testErr3 <-sqrt( sum((y_trainOut3 - pred_y3)^2))/length(pred_y3)
testErr3

## [1] 0.1248874

###
##comparing predicted and test values
pred3<-prediction(pred_y3, as.matrix(y_trainOut3))
###Get TPR and FPR values 

perf3<-performance(pred3,"tpr","fpr")
#performance(pred1,"auc") #


x3.values= data.frame(perf3@x.values)
y3.values= data.frame(perf3@y.values)


### ROC plots
plot(perf1,col="red", type = "l") # plot ROC curve
points(as.matrix(x2.values), as.matrix(y2.values),col="blue", type = "p") # plot ROC curve
points(as.matrix(x3.values), as.matrix(y3.values),col="green", type = "p") # plot ROC curve
abline(0,1)

legend("bottomright", c("building processed","building non processed","vehicle processed" ), cex = 0.6, 
       pch = c(20,20), col = c("red", "blue", "green"))# lty = 1:2,