#library(MASS)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.4.3
## Loaded glmnet 2.0-13
n: the number of samples (“rows” of data) required. mu: a vector giving the means of the variables. Sigma: positive-definite symmetric matrix specifying the covariance matrix of the variables.
X: n x p N(0, 1) p: number of factors l: number of tasks y: n x l response epsilon : n x l noise w : p x l. wij is the coef of factor i for task j
set.seed(65587)
# (n=12, p=16) q in [1,3] 3, l = 5
n <- 12
p <- 16
l <- 5
q <- 3
X <- read.csv("design1226.csv", skip = 7, header = FALSE)
X <- apply(X, 1, gsub, pattern="\\+", replacement= "1", perl=TRUE)
X <- apply(X, 1, gsub, pattern="\\-", replacement= "-1", perl=TRUE)
X <- matrix(as.numeric(X), n, p)
lasso_correct <<- numeric()
mt_correct <<- numeric()
lasso_wRSS <<- numeric()
mt_wRSS <<- numeric()
getXy <- function(){
# Have 5 different responses
# There are 3 same true var for different tasks, the true vars are randomly chosen
# for every task, we need a vector to set the true vars
w <<- matrix(0, p, l)
for (j in 1:l) {
true_var <- c(1, 2, 3) #sample(p, q)
w[true_var, j] <<- round(rnorm(q, mean = 0, sd = 1), 3)
}
y <<- X %*% w
# epsilon is the gaussian error. dimension: n * l.
epsilon <- matrix(rnorm(n * l), n, l)
y <<- y + epsilon
}
compare <- function() {
mt_fit = glmnet(X, y, family = "mgaussian")
#plot(mt_fit, type.coef = "2norm", label = TRUE)
## Use Cross Validation to select $\lambda$, and compare.
cv.mt_fit = cv.glmnet(X, y, family = "mgaussian", intercept = FALSE)
cv.fit1 = cv.glmnet(X, y[, 1], intercept = FALSE)
cv.fit2 = cv.glmnet(X, y[, 2], intercept = FALSE)
cv.fit3 = cv.glmnet(X, y[, 3], intercept = FALSE)
cv.fit4 = cv.glmnet(X, y[, 4], intercept = FALSE)
cv.fit5 = cv.glmnet(X, y[, 5], intercept = FALSE)
# Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per fold
# If grouped=FALSE, an error matrix is built up at the observation level from the predictions from the nfold fits, and then summarized
#plot(cv.mt_fit)
#plot(cv.fit1)
#plot(cv.fit2)
#plot(cv.fit3)
# multi-task
mt_beta = cbind(
coef(cv.mt_fit)$y1,
coef(cv.mt_fit)$y2,
coef(cv.mt_fit)$y3,
coef(cv.mt_fit)$y4,
coef(cv.mt_fit)$y5
)
mt_beta = mt_beta[-1,]
#print(mt_beta)
#print(sum((mt_beta - w) ^ 2))
# lasso for each response
lasso_beta = cbind(coef(cv.fit1),
coef(cv.fit2),
coef(cv.fit3),
coef(cv.fit4),
coef(cv.fit5))
lasso_beta = lasso_beta[-1,]
#print(lasso_beta)
#print(sum((lasso_beta - w) ^ 2))
# get whether correct selection
mt_correct <<- c(mt_correct, sum(as.logical(w) == as.logical(mt_beta)) / (p * l))
lasso_correct <<- c(lasso_correct, sum(as.logical(w) == as.logical(lasso_beta)) / (p * l))
# get RSS fotr w
lasso_wRSS <<- c(lasso_wRSS, sum((w - lasso_beta) ^ 2))
mt_wRSS <<- c(mt_wRSS, sum((w - mt_beta) ^ 2))
}
Main function
for(ii in 1:30){
getXy()
compare()
}
mean(mt_wRSS)
## [1] 4.296501
mean(lasso_wRSS)
## [1] 6.029978
mean(mt_correct)
## [1] 0.92875
mean(lasso_correct)
## [1] 0.8495833