Logistic Regression

Cost function and Grad Function

CostFunction and GradFunction compute cost and gradient for regularized logistic regression with multiple variables

[J] = CostFunction(X, y, theta, Lambda)) computes the cost of using theta as the parameter for logisitic regression to fit the data points in X and y. Returns the cost in J

[Grad] = GradFunction return the gradient in grad

CostFunction <- function(X, y, Theta, Lambda){
  Hypothesis <- 1 / (1 + exp(- X %*% Theta)) # probability that y = 1 on input X
  J <- (1/m) * sum(- y * log(Hypothesis) - (1 - y) * log(1 - Hypothesis)) # compute J cost
  reg <- (Lambda / (2 * m)) * sum(Theta[2:length(Theta)] ^ 2)
  J <- J + reg
  return(J)
}


GradFunction <- function(X, y, Theta, Lambda){
  Hypothesis <- 1 / (1 + exp(- X %*% Theta))
  Grad <- ((t(X) %*% (Hypothesis - y)) / m) + ((Lambda / m) * c(0,Theta[2:length(Theta)]))
  return(Grad)
}

Train Logistic Regression

TainLogisticReg trains logistic regression given a dataset (X,y) and a regularization parameter lambda

[theta] = TrainLogisticReg (X, y, lambda) trains logistic regression using the dataset (X, y) and regularization parameter lambda. Returns the parameters theta

TrainLogisticReg <- function(X, y, Lambda){
  X <- cbind(1, X) # add 1 to X for Theta 0
  ifelse(is.vector(X), initial_theta <- c(0,0), initial_theta <- rep(0, ncol(X))) # initialize theta
  
  ifelse(is.vector(X), m <- length(X), m <- nrow(X))
  assign("m", m, .GlobalEnv)
  
  LogisticRec <- optim(par = initial_theta,
                       fn = CostFunction,
                       gr = GradFunction,
                       method = "BFGS",
                       X = X,
                       y = y,
                       Lambda = Lambda)
  
  print(paste("J", LogisticRec$value))
  return(LogisticRec$par)
}

One vs all technique

OneVsAllFunction takes X, y, LAmbda as input and give the prediction and Theta as the output. X should be given as matrix, without a column of 1 for the Theta0 included

Note: in the case of the Auto data, we don’t have a situation where y= 0, where the data is not an auto. However, for the case of spam, no spam we would need a +1

OneVsAllFunction <- function(X, y, Lambda){
  y_cat <- unique(y) # determine hpw many categories we have within y
  Predict_result <- matrix(nrow = nrow(X), ncol = length(y_cat)) # create a matrix to put the Y prediction
  colnames(Predict_result) <- y_cat # put the category names
  
  Theta_result <- matrix(nrow = length(y_cat), ncol = ncol(X) + 1) # +1 for theta 0
  
  row.names(Theta_result) <- y_cat
  
  for(i in 1:length(y_cat)){ #check if we should add the +1
    y_temp <- rep(0, length(y))
    
    for(j in 1:length(y)){ #loop to create the 1 | 0  y_temp
      ifelse(y[j] == y_cat[i], y_temp[j] <- 1, y_temp[j] <- 0)
    } 
    
    Theta_temp <- TrainLogisticReg(X, y_temp, Lambda)
    Predict_result[ ,i] <- 1 / (1 + exp(- cbind(1, X) %*% Theta_temp)) #probability that y = 1 on input X
    Theta_result[i, ] <- Theta_temp 
    
  }
  
  Predict_result <- as.data.frame(Predict_result)
  # check which column has the max result
  Predict_result$Final <- max.col(Predict_result[ ,1:length(y_cat)], ties.method = "first")
  Result <- list("Predict" = Predict_result, "Theta" = Theta_result) # create list of the result
  return(Result)
  
}

Example with iris dataset

ds_iris <- iris[sample(1 : nrow(iris)), ] # shuffle

ds_iris_train <- ds_iris[1:90, ] # create the train dataset with 60% of the data

ds_iris_test <- ds_iris[91:150, ] # create the test dataset with 40% of the data

# Logistic regression

iris_reg <- OneVsAllFunction(X= as.matrix(ds_iris_train[ ,1:4]),
                             y= as.character(ds_iris_train[ ,5]),
                             Lambda = 0)
## [1] "J 2.05224516416223e-05"
## [1] "J 0.0332640504341469"
## [1] "J 0.404728679150913"
# Predict the outcome
predict <- 1 / (1 + exp(- cbind(1, as.matrix(ds_iris_test[, 1:4])) %*% t(iris_reg$Theta)))

predict <- max.col(predict, ties.method = "first")
predict <- gsub(1, "setosa", predict)
predict <- gsub(2, "virginica", predict)
predict <- gsub(3, "versicolor", predict)

table(predict, ds_iris_test$Species) # cross validation
##             
## predict      setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         21         3
##   virginica       0          0        21