CostFunction and GradFunction compute cost and gradient for regularized logistic regression with multiple variables
[J] = CostFunction(X, y, theta, Lambda)) computes the cost of using theta as the parameter for logisitic regression to fit the data points in X and y. Returns the cost in J
[Grad] = GradFunction return the gradient in grad
CostFunction <- function(X, y, Theta, Lambda){
Hypothesis <- 1 / (1 + exp(- X %*% Theta)) # probability that y = 1 on input X
J <- (1/m) * sum(- y * log(Hypothesis) - (1 - y) * log(1 - Hypothesis)) # compute J cost
reg <- (Lambda / (2 * m)) * sum(Theta[2:length(Theta)] ^ 2)
J <- J + reg
return(J)
}
GradFunction <- function(X, y, Theta, Lambda){
Hypothesis <- 1 / (1 + exp(- X %*% Theta))
Grad <- ((t(X) %*% (Hypothesis - y)) / m) + ((Lambda / m) * c(0,Theta[2:length(Theta)]))
return(Grad)
}
TainLogisticReg trains logistic regression given a dataset (X,y) and a regularization parameter lambda
[theta] = TrainLogisticReg (X, y, lambda) trains logistic regression using the dataset (X, y) and regularization parameter lambda. Returns the parameters theta
TrainLogisticReg <- function(X, y, Lambda){
X <- cbind(1, X) # add 1 to X for Theta 0
ifelse(is.vector(X), initial_theta <- c(0,0), initial_theta <- rep(0, ncol(X))) # initialize theta
ifelse(is.vector(X), m <- length(X), m <- nrow(X))
assign("m", m, .GlobalEnv)
LogisticRec <- optim(par = initial_theta,
fn = CostFunction,
gr = GradFunction,
method = "BFGS",
X = X,
y = y,
Lambda = Lambda)
print(paste("J", LogisticRec$value))
return(LogisticRec$par)
}
OneVsAllFunction takes X, y, LAmbda as input and give the prediction and Theta as the output. X should be given as matrix, without a column of 1 for the Theta0 included
Note: in the case of the Auto data, we don’t have a situation where y= 0, where the data is not an auto. However, for the case of spam, no spam we would need a +1
OneVsAllFunction <- function(X, y, Lambda){
y_cat <- unique(y) # determine hpw many categories we have within y
Predict_result <- matrix(nrow = nrow(X), ncol = length(y_cat)) # create a matrix to put the Y prediction
colnames(Predict_result) <- y_cat # put the category names
Theta_result <- matrix(nrow = length(y_cat), ncol = ncol(X) + 1) # +1 for theta 0
row.names(Theta_result) <- y_cat
for(i in 1:length(y_cat)){ #check if we should add the +1
y_temp <- rep(0, length(y))
for(j in 1:length(y)){ #loop to create the 1 | 0 y_temp
ifelse(y[j] == y_cat[i], y_temp[j] <- 1, y_temp[j] <- 0)
}
Theta_temp <- TrainLogisticReg(X, y_temp, Lambda)
Predict_result[ ,i] <- 1 / (1 + exp(- cbind(1, X) %*% Theta_temp)) #probability that y = 1 on input X
Theta_result[i, ] <- Theta_temp
}
Predict_result <- as.data.frame(Predict_result)
# check which column has the max result
Predict_result$Final <- max.col(Predict_result[ ,1:length(y_cat)], ties.method = "first")
Result <- list("Predict" = Predict_result, "Theta" = Theta_result) # create list of the result
return(Result)
}
ds_iris <- iris[sample(1 : nrow(iris)), ] # shuffle
ds_iris_train <- ds_iris[1:90, ] # create the train dataset with 60% of the data
ds_iris_test <- ds_iris[91:150, ] # create the test dataset with 40% of the data
# Logistic regression
iris_reg <- OneVsAllFunction(X= as.matrix(ds_iris_train[ ,1:4]),
y= as.character(ds_iris_train[ ,5]),
Lambda = 0)
## [1] "J 2.05224516416223e-05"
## [1] "J 0.0332640504341469"
## [1] "J 0.404728679150913"
# Predict the outcome
predict <- 1 / (1 + exp(- cbind(1, as.matrix(ds_iris_test[, 1:4])) %*% t(iris_reg$Theta)))
predict <- max.col(predict, ties.method = "first")
predict <- gsub(1, "setosa", predict)
predict <- gsub(2, "virginica", predict)
predict <- gsub(3, "versicolor", predict)
table(predict, ds_iris_test$Species) # cross validation
##
## predict setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 21 3
## virginica 0 0 21