We want to train a logistic regression model on a dataset consisting of 2 feature variables.The data is in the folder ex2data2.txt in the usual teams file share folder for the class. Here is the plot of the binary Y target variable against the 2 features (crosses are 1s and circles are 0s).
#Read the data
library(data.table)
library(ggplot2)
dataset<-as.matrix(fread("ex2data2.txt"))
colnames(dataset)<-c("x1","x2","y")
d<-as.data.frame(dataset)
d$y<-as.factor(d$y)
ggplot(d, aes(x = x1, y = x2, colour = y)) + geom_point(aes(shape = y, stroke = 2), size = 3) + scale_shape_manual(values=c(1,3))
predicted_prob <- function(theta,X){
return(1/(1+exp(-(X%*%theta))))
}
costfunctionreg = function(theta, X, y, lambda){
#COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
# J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
# theta as the parameter for regularized logistic regression and the
# gradient of the cost w.r.t. to the parameters.
# Initialize some useful values
m = length(y); # number of training examples
# You need to return the following variables correctly
grad = matrix(0,length(theta),1);
h_theta = predicted_prob(theta,X);
J = (1/m) * sum((-y*log(h_theta) - (1-y)*log(1-h_theta))) + (lambda/(2*m)) * t(theta[2:length(theta)])%*%theta[2:length(theta)];
return(J)
}
train<-function(theta0,X,y,lambda){
theta_opt<-optim(theta0, costfunctionreg, gradient, X, y, lambda,
method = c("BFGS"),
lower = -Inf, upper = Inf,
control = list(reltol=10^-12, abstol = 10-12, maxit=10000000), hessian = FALSE)
return(theta_opt)
}
gradient<-function(theta,X,y,lambda){
# Initialize some useful values
m = length(y); # number of training examples
thetaZero = theta;
thetaZero = c(0,thetaZero[2:length(thetaZero)]);
h_theta = predicted_prob(theta,X);
grad = t((1 / m) * t(h_theta - y)%*%X) + (lambda)*thetaZero;
}
#Accuracy = (TP + TN)/(TP + FP +TN +FN)
#Recall = TP/(TP + FN)
#TNR = TN/(TN + FP)
#Precision = TP/(TP + FP)
#NPV = TN/(TN + FP)
classification_metrics<-function(theta,X,y,classification_threshold){
assigned_class<-as.numeric(predicted_prob(theta,X)>classification_threshold)
assigned_class<-factor(as.character(assigned_class),levels=c("0","1"))
y<-as.factor(y)
#Convert to factors
cm = as.matrix(table(y, assigned_class))
accuracy <- sum(diag(cm)) / sum(cm)
recall <- cm[2,2] / (cm[2,1] + cm[2,2])
TNR <- cm[1,1]/(cm[1,1] + cm[1,2])
precision <- cm[2,2] / (cm[1,2] + cm[2,2])
NPV <- cm[1,1]/(cm[1,2] + cm[1,1])
return(list(accuracy=accuracy,recall=recall,TNR=TNR,precision=precision,NPV=NPV))
}
add.poly.features <- function(x, degree){
new.mat <- matrix(1, nrow=nrow(x))
for (i in 1:degree){
for (j in 0:i){
new.mat <- cbind(new.mat, (x[,1]^(i-j) * (x[,2]^j)))
}
}
return(new.mat)
}
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked _by_ '.GlobalEnv':
##
## train
set.seed(100)
train_indexes<-createDataPartition(dataset[,"y"],p=0.7,list=F)
X_train<-dataset[train_indexes,colnames(dataset) != "y"]
y_train<-dataset[train_indexes,"y"]
X_test<-dataset[-train_indexes,colnames(dataset) != "y"]
y_test<-dataset[-train_indexes,"y"]
degree_to_fit <- 1
lambda<-0
X_train_features<-add.poly.features(X_train,degree_to_fit)
X_test_features<-add.poly.features(X_test,degree_to_fit)
theta0<-rep(0,dim(X_train_features)[2])
res<-train(theta0,X_train_features,y_train,lambda = lambda)
print(res$par)
## [1] -0.1120450 -0.1282820 0.2213682
classification_metrics(res$par,X_train_features,y_train,0.5)
## $accuracy
## [1] 0.4939759
##
## $recall
## [1] 0.3
##
## $TNR
## [1] 0.6744186
##
## $precision
## [1] 0.4615385
##
## $NPV
## [1] 0.6744186
classification_metrics(res$par,X_test_features,y_test,0.5)
## $accuracy
## [1] 0.5142857
##
## $recall
## [1] 0.3888889
##
## $TNR
## [1] 0.6470588
##
## $precision
## [1] 0.5384615
##
## $NPV
## [1] 0.6470588
degree_to_fit <- 2
lambda<-0
X_train_features<-add.poly.features(X_train,degree_to_fit)
X_test_features<-add.poly.features(X_test,degree_to_fit)
theta0<-rep(0,dim(X_train_features)[2])
res<-train(theta0,X_train_features,y_train,lambda = lambda)
classification_metrics(res$par,X_train_features,y_train,0.5)
## $accuracy
## [1] 0.7951807
##
## $recall
## [1] 0.775
##
## $TNR
## [1] 0.8139535
##
## $precision
## [1] 0.7948718
##
## $NPV
## [1] 0.8139535
classification_metrics(res$par,X_test_features,y_test,0.5)
## $accuracy
## [1] 0.9428571
##
## $recall
## [1] 1
##
## $TNR
## [1] 0.8823529
##
## $precision
## [1] 0.9
##
## $NPV
## [1] 0.8823529
degree_to_fit <- 3
lambda<-0
X_train_features<-add.poly.features(X_train,degree_to_fit)
X_test_features<-add.poly.features(X_test,degree_to_fit)
theta0<-rep(0,dim(X_train_features)[2])
res<-train(theta0,X_train_features,y_train,lambda = lambda)
classification_metrics(res$par,X_train_features,y_train,0.5)
## $accuracy
## [1] 0.8313253
##
## $recall
## [1] 0.85
##
## $TNR
## [1] 0.8139535
##
## $precision
## [1] 0.8095238
##
## $NPV
## [1] 0.8139535
classification_metrics(res$par,X_test_features,y_test,0.5)
## $accuracy
## [1] 0.9142857
##
## $recall
## [1] 1
##
## $TNR
## [1] 0.8235294
##
## $precision
## [1] 0.8571429
##
## $NPV
## [1] 0.8235294
degree_to_fit <- 4
lambda<-0
X_train_features<-add.poly.features(X_train,degree_to_fit)
X_test_features<-add.poly.features(X_test,degree_to_fit)
theta0<-rep(0,dim(X_train_features)[2])
res<-train(theta0,X_train_features,y_train,lambda = lambda)
classification_metrics(res$par,X_train_features,y_train,0.5)
## $accuracy
## [1] 0.8554217
##
## $recall
## [1] 0.9
##
## $TNR
## [1] 0.8139535
##
## $precision
## [1] 0.8181818
##
## $NPV
## [1] 0.8139535
classification_metrics(res$par,X_test_features,y_test,0.5)
## $accuracy
## [1] 0.9142857
##
## $recall
## [1] 1
##
## $TNR
## [1] 0.8235294
##
## $precision
## [1] 0.8571429
##
## $NPV
## [1] 0.8235294
# Create the data for the chart
A <- c(49,80,83,86)
D <- c("1","2","3","4")
# Plot the bar chart
barplot(A,names.arg=D,xlab="Polynomial Degree",ylab="Accurary",col="blue",
main="Accuracy Chart",border="red")
## Accuracy Chart by Polynomial Degree for the Test Set
# Create the data for the chart
A <- c(51,94,91,91)
D <- c("1","2","3","4")
# Plot the bar chart
barplot(A,names.arg=D,xlab="Polynomial Degree",ylab="Accurary",col="blue",
main="Accuracy Chart",border="red")
Comments on the Results
The accuracy results for the training and the test sets are presented by the above two bar charts.
Our model selection is based on the test accuracy because the training accuracy might be high due to overfitting.
We choose polynomial of degree 2 because it provides the highest accuracy, 0.94 (see Bar Chart for Test Accuracy).
The training accuracy is getting higher as the degree of polynomial increases but this is due to overfitting. That is, the polynomial fits the data of the training set but this model does not fit points not used in the training set and this is the reason why we split the data in training and test sets. The classification metrics should be measured on data not used in the training set.