Larger model no correlation.

#library(MASS)
library(glmnet)

## Loading required package: Matrix

## Loading required package: foreach

## Warning: package 'foreach' was built under R version 3.4.3

## Loaded glmnet 2.0-13

add design

n: the number of samples (“rows” of data) required. mu: a vector giving the means of the variables. Sigma: positive-definite symmetric matrix specifying the covariance matrix of the variables.

X: n x p N(0, 1) p: number of factors l: number of tasks y: n x l response epsilon : n x l noise w : p x l. wij is the coef of factor i for task j

Initialization

set.seed(65587)
# (n=12, p=16) q in [1,3] 3, l = 5
n <- 12
p <- 16
l <- 5
q <- 3
X <- read.csv("design1226.csv", skip = 7, header = FALSE)
X <- apply(X, 1, gsub, pattern="\\+", replacement= "1", perl=TRUE)
X <- apply(X, 1, gsub, pattern="\\-", replacement= "-1", perl=TRUE)
X <- matrix(as.numeric(X), n, p)

lasso_correct <<- numeric()
mt_correct <<- numeric()
lasso_wRSS <<- numeric()
mt_wRSS <<- numeric()

Define the global variable X, w, y. Replicate 10000 times.

getXy <- function(){
    # Have 5 different responses
    # There are 3 same true var for different tasks, the true vars are randomly chosen
    
    # for every task, we need a vector to set the true vars
    w <<- matrix(0, p, l)
    for (j in 1:l) {
        true_var <- c(1, 2, 3)  #sample(p, q)
        w[true_var, j] <<- round(rnorm(q, mean = 0, sd = 1), 3)
    }
    y <<- X %*% w
    # epsilon is the gaussian error. dimension: n * l.
    epsilon <- matrix(rnorm(n * l), n, l)
    y <<- y + epsilon
}

Function for multi-task LASSO experiment

compare <- function() {
    mt_fit = glmnet(X, y, family = "mgaussian")
    #plot(mt_fit, type.coef = "2norm", label = TRUE)
    
    ## Use Cross Validation to select $\lambda$, and compare.
    cv.mt_fit = cv.glmnet(X, y, family = "mgaussian", intercept = FALSE)
    cv.fit1 = cv.glmnet(X, y[, 1], intercept = FALSE)
    cv.fit2 = cv.glmnet(X, y[, 2], intercept = FALSE)
    cv.fit3 = cv.glmnet(X, y[, 3], intercept = FALSE)
    cv.fit4 = cv.glmnet(X, y[, 4], intercept = FALSE)
    cv.fit5 = cv.glmnet(X, y[, 5], intercept = FALSE)
    # Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
    # If grouped=FALSE, an error matrix is built up at the observation level from the predictions from the nfold fits, and then summarized
    
    #plot(cv.mt_fit)
    #plot(cv.fit1)
    #plot(cv.fit2)
    #plot(cv.fit3)
    
    # multi-task
    mt_beta = cbind(
    coef(cv.mt_fit)$y1,
    coef(cv.mt_fit)$y2,
    coef(cv.mt_fit)$y3,
    coef(cv.mt_fit)$y4,
    coef(cv.mt_fit)$y5
    )
    mt_beta = mt_beta[-1,]
    #print(mt_beta)
    #print(sum((mt_beta - w) ^ 2))
    
    # lasso for each response
    lasso_beta = cbind(coef(cv.fit1),
    coef(cv.fit2),
    coef(cv.fit3),
    coef(cv.fit4),
    coef(cv.fit5))
    lasso_beta = lasso_beta[-1,]
    #print(lasso_beta)
    #print(sum((lasso_beta - w) ^ 2))
    
    # get whether correct selection
    mt_correct <<- c(mt_correct, sum(as.logical(w) == as.logical(mt_beta)) / (p * l))
    lasso_correct <<- c(lasso_correct, sum(as.logical(w) == as.logical(lasso_beta)) / (p * l))
    
    # get RSS fotr w
    lasso_wRSS <<- c(lasso_wRSS, sum((w - lasso_beta) ^ 2))
    mt_wRSS <<- c(mt_wRSS, sum((w - mt_beta) ^ 2))
}

Main function

for(ii in 1:30){
    getXy()
    compare()
}

mean(mt_wRSS)

## [1] 4.296501

mean(lasso_wRSS)

## [1] 6.029978

mean(mt_correct)

## [1] 0.92875

mean(lasso_correct)

## [1] 0.8495833

SSD multi-task lasso vs lasso 1216

Huiting Su

April 4, 2018

Larger model no correlation.

add design

Initialization

Define the global variable X, w, y. Replicate 10000 times.

Function for multi-task LASSO experiment