Codes
library(ggplot2)
library(patchwork)
library(directlabels)
library(lars)
library(glmnet)
library(HDInterval)
Final <- function(simulation_iter, simulation_burn, realdata_iter, realdata_burn){
simulate <- function(n){
xi1 <- rnorm(n, 2, 0.5)
xi2 <- runif(n, -0.1, 0.1)
xi3 <- rgamma(n, 2, 198)
xi4 <- rnorm(n, 5, 1)
eps <- rnorm(n, 0.1, 2)
yi <- 3*xi1+xi2+0.2*xi3-0.5*xi4+eps
cbind(y=yi, x1=xi1, x2=xi2, x3=xi3, x4=xi4)
}
res <- function(){
mcmcbayes <- function(xvar, yvar, start, n_iters=simulation_iter, burnin=simulation_burn, prior='gamma'){
# used for calculating the log likelihood
sd_residuals <- function(b_est, m_est, xvar, yvar){
y_preds <- xvar*m_est + b_est
resids <- yvar - y_preds
print(sd(resids))
}
# b = intercept
# m = x coefficient
# compare to: `logLik(lm(yvar~xvar))`
loglik <- function(b_est, m_est, xvar, yvar){
y_preds <- xvar*m_est + b_est
likhood <- log(dnorm(yvar, mean=y_preds, sd=sd_residuals(b, m, xvar, yvar)))
print(sum(likhood))
}
### *** ###
prior <- function(p){
if(p=='gamma'){
rgamma(1, p+r, d+0.5*sum(tau^2))
}else if(p=='uniform'){
runif(1, eta1,eta2)
}else if(p=='beta'){
rbeta(1, nu1, nu2)
}
}
Lambda = runif(24)
Theta = runif(4)
prior(p='theta')
df = simulate(n=24)
xvar = df[,2:5]
yvar = df[,1]
loglik <- function(Lambda, Theta, xvar, yvar){
a1 <- log(prior(p='theta'))
a2 <- sum(log(1+Lambda %*% xvar %*% (as.matrix(yvar)- xvar %*% as.matrix(Theta))))
exp(a1-a2)
}
# play around with `b_params` and `m_params`
# also try setting different shapes for the priors (e.g. dbeta())
prior <- function(b_est, m_est, b_params=c(0,5), m_params=c(0,10)){
if(prior=='gamma'){
prior_b <- log(dnorm(b_est, mean=b_params[1], sd=b_params[2]))
prior_m <- log(dunif(m_est, min=m_params[1], max=m_params[2]))
alpha <- rgamma(1, p+r, d+0.5*sum(tau^2))
}else if(prior=='uniform'){
prior_b <- log(dnorm(b_est, mean=b_params[1], sd=b_params[2]))
prior_m <- log(dunif(m_est, min=m_params[1], max=m_params[2]))
alpha <- runif(1, eta1,eta2)
}else if(prior=='beta'){
prior_b <- log(dnorm(b_est, mean=b_params[1], sd=b_params[2]))
prior_m <- log(dunif(m_est, min=m_params[1], max=m_params[2]))
alpha <- rbeta(1, nu1, nu2)
}
print(prior_b + prior_m + alpha)
}
posterior <- function(b_est, m_est, xvar, yvar){
loglikhood <- loglik(b_est, m_est, xvar, yvar)
combined_logprior <- prior(b_est, m_est)
print(loglikhood + combined_logprior)
}
# play around with the `sd` parameters for generating new values
generate_new_values <- function(b_est, m_est){
print(rnorm(2, mean=c(b_est, m_est), sd=c(.5,.1)))
}
# implements the metropolis-hastings MCMC algorithm.
# semi-intelligently tries different values iteratively to estimate parameters
mh_mcmc <- function(xvar, yvar, start, n_iters=simulation_iter, burnin=simulation_burn){
# initialize the chain
chain <- matrix(rep(NA, (n_iters+1)*2), ncol=2)
chain[1, ] <- start
# for each iteration:
for (x in 1:n_iters){
# x=1
# guess some values for `m` and `b`
proposal <- generate_new_values(b_est=chain[x,1], m_est=chain[x,2])
# unnormalized probabilities of new and most recent values
proposal_prob <- posterior(b_est=proposal[1], m_est=proposal[2], xvar, yvar)
current_prob <- posterior(b_est=chain[x,1], m_est=chain[x,2], xvar, yvar)
# 'prob' that new vals are more likely than current chain vals
# (NOTE: NOT A TRUE PROBABILITY -- CAN BE ABOVE 1 BUT USUALLY DOESNT MATTER)
prob <- exp(proposal_prob - current_prob)
# get a random probability
random_prob <- runif(1, min=0, max=1)
# if prob for new value is better than random prob, accept proposal
if (random_prob < prob){
chain[x+1, ] <- proposal
} else {
# otherwise retain most recent value
chain[x+1, ] <- chain[x, ]
}
}
colnames(chain) <- c("theta_b", "theta_m")
# throw out the first `burnin` iterations before print
if (!is.null(burnin)){
print(chain[(floor(burnin*nrow(chain))+1) : nrow(chain), ])
} else {
print(chain)
}
}
print(chain)
}
ns <- c(20,40,100)
alphas = rep(c(0,0.5,1,1.5,2,3),2)
thetas = c(3,1,0.2,-0.5)
n1=20;n2=40;n3=100
df1 <- simulate(n=n1)
df2 <- simulate(n=n2)
df3 <- simulate(n=n3)
x1 <- df1[,2:5]
y1 <- df1[,1]
x2 <- df2[,2:5]
y2 <- df2[,1]
x3 <- df3[,2:5]
y3 <- df3[,1]
ii1 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x1,y1,type="lasso", lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x1,y1, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x1, y1, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit3$lambda - alpha)^2)
}else{
fit1 <- cv.glmnet(x1, y1, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x1,y1, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x1, y1, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit3$lambda - alpha)^2)
}
jj <- c(jj, mse1, mse2, mse3)
}
ii1 <- rbind(ii1, jj)
}
ii2 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x2,y2,type="lasso", lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x2,y2, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x2, y2, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit1$lambda - alpha)^2)
}else{
fit1 <- cv.glmnet(x2, y2, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x2,y2, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x2, y2, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit1$lambda - alpha)^2)
}
jj <- c(jj, mse1, mse2, mse3)
}
ii2 <- rbind(ii2, jj)
}
ii3 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x3,y3,type="lasso", lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x3,y3, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x3, y3, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit3$lambda - alpha)^2)
}else{
fit1 <- cv.glmnet(x3, y3, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse1 <- mean((fit1$lambda - alpha)^2)
fit2 <- mcmcbayes(x3,y3, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mse2 <- mean((apply(fit2, 2, mean)-alpha)^2)
fit3 <- cv.glmnet(x3, y3, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mse3 <- mean((fit3$lambda - alpha)^2)
}
jj <- c(jj, mse1, mse2, mse3)
}
ii3 <- rbind(ii3, jj)
}
tab1 <- rbind(ii1,ii2, ii3)
tab11 <- cbind(rep(c(20,40,100),each=12),rep(c(0,0.5,1,1.5,2,3),6), tab1)
methods <- rep(c('Lasso','Ridge'),each=6, times=3)
colnames(tab11) <- c('n','shrinkage',rep(c('BEL','Bayes','Freq'), 4))
tab11ff <- rbind(c(c(NA,NA),rep(c(3,1,0.2,-0.5),each=3)),tab11)
rownames(tab11ff) <- c('theta',methods)
##############################################
ii1 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x1,y1,type="lasso", lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x1,y1, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x1, y1, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}else{
fit1 <- cv.glmnet(x1, y1, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x1,y1, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x1, y1, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}
jj <- c(jj, mbias1, mbias2, mbias3)
}
ii1 <- rbind(ii1, jj)
}
ii2 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x2,y2,type="lasso", lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x2,y2, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x2, y2, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}else{
fit1 <- cv.glmnet(x2, y2, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x2,y2, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x2, y2, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}
jj <- c(jj, mbias1, mbias2, mbias3)
}
ii2 <- rbind(ii2, jj)
}
ii3 <- c()
for (i in 1:length(alphas)) {
alpha <- alphas[i]
jj <- c()
for (j in 1:length(thetas)) {
theta <- thetas[j]
if(i <= 6){
fit1 <- lars(x3,y3,type="lasso", lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x3,y3, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x3, y3, alpha = 1, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}else{
fit1 <- cv.glmnet(x3, y3, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias1 <- mean(fit1$lambda - alpha)
fit2 <- mcmcbayes(x3,y3, start = thetas, n_iters = simulation_iter, burnin = simulation_burn)
mbias2 <- mean(apply(fit2, 2, mean)-alpha)
fit3 <- cv.glmnet(x3, y3, alpha = 0, nfolds = 5, parallel = TRUE, lambda = alpha)
mbias3 <- mean(fit3$lambda - alpha)
}
jj <- c(jj, mbias1, mbias2, mbias3)
}
ii3 <- rbind(ii3, jj)
}
tab2 <- rbind(ii1,ii2, ii3)
tab22 <- cbind(rep(c(20,40,100),each=12),rep(c(0,0.5,1,1.5,2,3),6), tab2)
methods <- rep(c('Lasso','Ridge'),each=6, times=3)
colnames(tab22) <- c('n','shrinkage',rep(c('BEL','Bayes','Freq'), 4))
table2 <- rbind(c(c(NA,NA),rep(c(3,1,0.2,-0.5),each=3)),tab22)
rownames(table2) <- c('theta',methods)
#---------------------------------------------
# Diabetes Data
# http://statweb.lsu.edu/faculty/li/teach/exst7142/diabetes.html
loc <- "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.tab.txt"
dat <- read.table(loc,header=T,sep="\t")
# training and testing
set.seed(2)
i.train <- sample(1:nrow(dat), size=round(0.5*nrow(dat)), replace = F)
dat.train <- dat[i.train,]
dat.test <- dat[-i.train,]
x <- as.matrix(dat.train[,1:10])
y <- dat.train[,11]
diabetes_map_lm <- lm(y ~ x)
diabetes_map_ridge <- glmnet(x, y, alpha = 1)
diabetes_map_lasso <- glmnet(x, y, alpha = 0)
thetas.start <- diabetes_map_lm$coefficients[-1]
x <- as.matrix(dat.test[,1:10])
y <- dat.test[,11]
fit.train1 <- mcmcbayes(x,y, start = thetas.start, n_iters = realdata_iter, burnin = realdata_burn, prior = 'gamma')
fit.train2 <- mcmcbayes(x,y, start = thetas.start, n_iters = realdata_iter, burnin = realdata_burn, prior = 'uniform')
fit.train3 <- mcmcbayes(x,y, start = thetas.start, n_iters = realdata_iter, burnin = realdata_burn, prior = 'beta')
shrink.post.1 <- mean(fit.train1[,11])
shrink.post.2 <- mean(fit.train2[,11])
shrink.post.3 <- mean(fit.train3[,11])
table3 <- cbind(shrink.post.1, shrink.post.2, shrink.post.3)
colnames(table3) <- c('Gamma','Uniform','Beta')
rownames(table3) <- 'alpha_lasso'
#---------------------------------------------
post_params1 <- apply(fit.train1[,-11], 2, mean)
post_params2 <- apply(fit.train2[,-11], 2, mean)
post_params3 <- apply(fit.train3[,-11], 2, mean)
table4 <- rbind(post_params1, post_params2, post_params3)
colnames(table4) <- colnames(dat)[-ncol(dat)]
rownames(table4) <- c('Gamma','Uniform','Beta')
#------------------------------------------------
HPD <- function(x) hdi(x, credMass=0.95)
hpdg <- apply(fit.train1[,-11], 2, HPD)
HPD_g <- t(hpdg)
hpdu <- apply(fit.train2[,-11], 2, HPD)
HPD_u <- t(hpdu)
hpdb <- apply(fit.train3[,-11], 2, HPD)
HPD_b <- t(hpdb)
table5_hpd <- cbind(HPD_g,HPD_u,HPD_b)
colnames(table5_hpd) <- c('Variables',c('Gamma_LO','Gamma_UP','Uniform_LO','Uniform_UP','Beta_LO','Beta_UP'))
rownames(table5_hpd) <- NULL
table5_hpd <- as.data.frame(table5_hpd)
#------------------------------------------------
CRI <- function(x) quantile(x, probs = c(.025,.975))
CRI_g <- apply(fit.train1[,-11], 2, CRI)
CRI_u <- apply(fit.train2[,-11], 2, CRI)
CRI_b <- apply(fit.train3[,-11], 2, CRI)
table5_cri <- cbind(CRI_g,CRI_u,CRI_b)
colnames(table5_cri) <- c('Variables',c('Gamma_LO','Gamma_UP','Uniform_LO','Uniform_UP','Beta_LO','Beta_UP'))
rownames(table5_cri) <- NULL
table5_cri <- as.data.frame(table5_cri)
#---------------------------------------------------
alpha.vals <- c(0,0.5,1,5,10,50,200,500,1000,2000)
BEL <- matrix(NA, nrow = length(alpha.vals), ncol=10)
BL <- matrix(NA, nrow = length(alpha.vals), ncol=10)
for (i in 1:length(alpha.vals)) {
x <- as.matrix(dat.train[,1:10])
y <- dat.train[,11]
fit.lar <- lars(x1,y1,type="lar",lambda=alpha.vals[i])
BEL[i,] <- fit.lar$arc.length
x <- as.matrix(dat.test[,1:10])
y <- dat.test[,11]
fit.mcmc <- mcmcbayes(x,y, start = thetas.start, n_iters = realdata_iter, burnin = realdata_burn)
BL[i,] <- apply(fit.mcmc[,-11], 2, mean)
}
colnames(BEL) <- colnames(dat)[-ncol(dat)]
colnames(BL) <- colnames(dat)[-ncol(dat)]
BEL <- apply(BEL, 2, scale)
BL <- apply(BL, 2, scale)
#---------------------------------------------------
alpha.vals <- c(0,0.5,1,5,10,50,100,200,500,1000,2000)
BEL2 <- matrix(NA, nrow = length(alpha.vals), ncol=11)
BL2 <- matrix(NA, nrow = length(alpha.vals), ncol=11)
for (i in 1:length(alpha.vals)) {
x <- as.matrix(dat.train[,1:10])
y <- dat.train[,11]
fit.lar <- lars(x1,y1,type="ridge",lambda=alpha.vals[i])
BEL2[i,] <- fit.lar$arc.length
x <- as.matrix(dat.test[,1:10])
y <- dat.test[,11]
fit.mcmc <- mcmcbayes(x,y, start = thetas.start, n_iters = realdata_iter, burnin = realdata_burn)
BL2[i,] <- apply(fit.mcmc[,-11], 2, mean)
}
colnames(BEL2) <- colnames(dat)[-ncol(dat)]
colnames(BL2) <- colnames(dat)[-ncol(dat)]
BEL <- apply(BEL2, 2, scale)
BL <- apply(BL2, 2, scale)
ggplot(BEL2, aes(x=1:11))+
geom_line(aes(y=AGE), col=8, lty=8, size=1.1) +
geom_dl(aes(label = 'age',y=AGE), method = dl.combine("first.points"), col=8) +
geom_line(aes(y=SEX), col=9, lty=9, size=1.1) +
geom_dl(aes(label = 'sex',y=SEX), method = dl.combine("first.points"), col=9) +
geom_line(aes(y=BMI), col=3, lty=3, size=1.1) +
geom_dl(aes(label = 'bmi',y=BMI), method = dl.combine("first.points"), col=3) +
geom_line(aes(y=BP), col=5, lty=5, size=1.1) +
geom_dl(aes(label = 'bp',y=BP), method = dl.combine("first.points"), col=5) +
geom_line(aes(y=S1), col=10, lty=10, size=1.1) +
geom_dl(aes(label = 's1',y=S1), method = dl.combine("first.points"), col=10) +
geom_line(aes(y=S2), col=4, lty=4, size=1.1) +
geom_dl(aes(label = 's2',y=S2), method = dl.combine("first.points"), col=4) +
geom_line(aes(y=S3), col=11, lty=11, size=1.1) +
geom_dl(aes(label = 's3',y=S3), method = dl.combine("first.points"), col=11) +
geom_line(aes(y=S4), col=6, lty=6, size=1.1) +
geom_dl(aes(label = 's4',y=S4), method = dl.combine("first.points"), col=6) +
geom_line(aes(y=S5), col=2, lty=2, size=1.1) +
geom_dl(aes(label = 's5',y=S5), method = dl.combine("first.points"), col=2) +
geom_line(aes(y=S6), col=7, lty=7, size=1.1) +
geom_dl(aes(label = 's6',y=S6), method = dl.combine("first.points"), col=7) +
theme_classic() + scale_x_continuous(breaks=1:11, labels=alpha.vals) +
scale_y_continuous(n.breaks = 10) +
labs(x=expression(alpha), y='Stanrdardized Coefficient', title = 'Bayesian empirical likelihood for ridge trace plot')
ggsave('fig2left.jpg', width = 10)
ggplot(BL2, aes(x=1:11))+
geom_line(aes(y=AGE), col=8, lty=8, size=1.1) +
geom_dl(aes(label = 'age',y=AGE), method = dl.combine("first.points"), col=8) +
geom_line(aes(y=SEX), col=9, lty=9, size=1.1) +
geom_dl(aes(label = 'sex',y=SEX), method = dl.combine("first.points"), col=9) +
geom_line(aes(y=BMI), col=3, lty=3, size=1.1) +
geom_dl(aes(label = 'bmi',y=BMI), method = dl.combine("first.points"), col=3) +
geom_line(aes(y=BP), col=5, lty=5, size=1.1) +
geom_dl(aes(label = 'bp',y=BP), method = dl.combine("first.points"), col=5) +
geom_line(aes(y=S1), col=10, lty=10, size=1.1) +
geom_dl(aes(label = 's1',y=S1), method = dl.combine("first.points"), col=10) +
geom_line(aes(y=S2), col=4, lty=4, size=1.1) +
geom_dl(aes(label = 's2',y=S2), method = dl.combine("first.points"), col=4) +
geom_line(aes(y=S3), col=11, lty=11, size=1.1) +
geom_dl(aes(label = 's3',y=S3), method = dl.combine("first.points"), col=11) +
geom_line(aes(y=S4), col=6, lty=6, size=1.1) +
geom_dl(aes(label = 's4',y=S4), method = dl.combine("first.points"), col=6) +
geom_line(aes(y=S5), col=2, lty=2, size=1.1) +
geom_dl(aes(label = 's5',y=S5), method = dl.combine("first.points"), col=2) +
geom_line(aes(y=S6), col=7, lty=7, size=1.1) +
geom_dl(aes(label = 's6',y=S6), method = dl.combine("first.points"), col=7) +
theme_classic() + scale_x_continuous(breaks=1:11, labels=alpha.vals) +
scale_y_continuous(n.breaks = 10) +
labs(x=expression(alpha), y='Stanrdardized Coefficient', title = 'Bayesian ridge trace plot')
ggsave('fig2left.jpg', width = 10)
ggplot(BEL, aes(x=1:10))+
geom_line(aes(y=AGE), col=8, lty=8, size=1.1) +
geom_dl(aes(label = 'age',y=AGE), method = dl.combine("first.points"), col=8) +
geom_line(aes(y=SEX), col=9, lty=9, size=1.1) +
geom_dl(aes(label = 'sex',y=SEX), method = dl.combine("first.points"), col=9) +
geom_line(aes(y=BMI), col=3, lty=3, size=1.1) +
geom_dl(aes(label = 'bmi',y=BMI), method = dl.combine("first.points"), col=3) +
geom_line(aes(y=BP), col=5, lty=5, size=1.1) +
geom_dl(aes(label = 'bp',y=BP), method = dl.combine("first.points"), col=5) +
geom_line(aes(y=S1), col=10, lty=10, size=1.1) +
geom_dl(aes(label = 's1',y=S1), method = dl.combine("first.points"), col=10) +
geom_line(aes(y=S2), col=4, lty=4, size=1.1) +
geom_dl(aes(label = 's2',y=S2), method = dl.combine("first.points"), col=4) +
geom_line(aes(y=S3), col=11, lty=11, size=1.1) +
geom_dl(aes(label = 's3',y=S3), method = dl.combine("first.points"), col=11) +
geom_line(aes(y=S4), col=6, lty=6, size=1.1) +
geom_dl(aes(label = 's4',y=S4), method = dl.combine("first.points"), col=6) +
geom_line(aes(y=S5), col=2, lty=2, size=1.1) +
geom_dl(aes(label = 's5',y=S5), method = dl.combine("first.points"), col=2) +
geom_line(aes(y=S6), col=7, lty=7, size=1.1) +
geom_dl(aes(label = 's6',y=S6), method = dl.combine("first.points"), col=7) +
theme_classic() + scale_x_continuous(breaks=1:10, labels=alpha.vals) +
scale_y_continuous(n.breaks = 10) +
labs(x=expression(alpha), y='Stanrdardized Coefficient', title = 'Bayesian empirical likelihood for lasso trace plot')
ggsave('fig3left.jpg', width = 10)
ggplot(BL, aes(x=1:10))+
geom_line(aes(y=AGE), col=8, lty=8, size=1.1) +
geom_dl(aes(label = 'age',y=AGE), method = dl.combine("first.points"), col=8) +
geom_line(aes(y=SEX), col=9, lty=9, size=1.1) +
geom_dl(aes(label = 'sex',y=SEX), method = dl.combine("first.points"), col=9) +
geom_line(aes(y=BMI), col=3, lty=3, size=1.1) +
geom_dl(aes(label = 'bmi',y=BMI), method = dl.combine("first.points"), col=3) +
geom_line(aes(y=BP), col=5, lty=5, size=1.1) +
geom_dl(aes(label = 'bp',y=BP), method = dl.combine("first.points"), col=5) +
geom_line(aes(y=S1), col=10, lty=10, size=1.1) +
geom_dl(aes(label = 's1',y=S1), method = dl.combine("first.points"), col=10) +
geom_line(aes(y=S2), col=4, lty=4, size=1.1) +
geom_dl(aes(label = 's2',y=S2), method = dl.combine("first.points"), col=4) +
geom_line(aes(y=S3), col=11, lty=11, size=1.1) +
geom_dl(aes(label = 's3',y=S3), method = dl.combine("first.points"), col=11) +
geom_line(aes(y=S4), col=6, lty=6, size=1.1) +
geom_dl(aes(label = 's4',y=S4), method = dl.combine("first.points"), col=6) +
geom_line(aes(y=S5), col=2, lty=2, size=1.1) +
geom_dl(aes(label = 's5',y=S5), method = dl.combine("first.points"), col=2) +
geom_line(aes(y=S6), col=7, lty=7, size=1.1) +
geom_dl(aes(label = 's6',y=S6), method = dl.combine("first.points"), col=7) +
scale_y_continuous(n.breaks = 10) +
labs(x=expression(alpha), y='Stanrdardized Coefficient', title = 'Bayesian lasso trace plot')
ggsave('fig3right.jpg', width = 10)
#####################################################
mdata <- as.data.frame(fit.mcmc[,-11])
colnames(mdata) <- colnames(dat)[-ncol(dat)]
p1 <- ggplot(data=mdata,aes(x=1:4000, y=AGE)) +
geom_line() +
labs(x='Index', y=expression(theta[1]), title = 'trace plot of Age')
p2 <- ggplot(data=mdata,aes(x=AGE)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of Age')
p3 <- ggplot(data=mdata,aes(x=1:4000, y=SEX)) +
geom_line() +
labs(x='Index', y=expression(theta[2]), title = 'trace plot of Sex')
p4 <- ggplot(data=mdata,aes(x=SEX)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of Sex')
p5 <- ggplot(data=mdata,aes(x=1:4000, y=BMI)) +
geom_line() +
labs(x='Index', y=expression(theta[3]), title = 'trace plot of BMI')
p6 <- ggplot(data=mdata,aes(x=BMI)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of BMI')
p8 <- ggplot(data=mdata,aes(x=BP)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of BP')
p7 <- ggplot(data=mdata,aes(x=1:4000, y=BP)) +
geom_line() +
labs(x='Index', y=expression(theta[4]), title = 'trace plot of BP')
p10 <- ggplot(data=mdata,aes(x=S1)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S1')
p9 <- ggplot(data=mdata,aes(x=1:4000, y=S1)) +
geom_line() +
labs(x='Index', y=expression(theta[5]), title = 'trace plot of S1')
p12 <- ggplot(data=mdata,aes(x=S2)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S2')
p11 <- ggplot(data=mdata,aes(x=1:4000, y=S2)) +
geom_line() +
labs(x='Index', y=expression(theta[6]), title = 'trace plot of S2')
p14 <- ggplot(data=mdata,aes(x=S3)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S3')
p13 <- ggplot(data=mdata,aes(x=1:4000, y=S3)) +
geom_line() +
labs(x='Index', y=expression(theta[7]), title = 'trace plot of S3')
p16 <- ggplot(data=mdata,aes(x=S4)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S4')
p15 <- ggplot(data=mdata,aes(x=1:4000, y=S4)) +
geom_line() +
labs(x='Index', y=expression(theta[8]), title = 'trace plot of S4')
p18 <- ggplot(data=mdata,aes(x=S5)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S5')
p17 <- ggplot(data=mdata,aes(x=1:4000, y=S5)) +
geom_line() +
labs(x='Index', y=expression(theta[9]), title = 'trace plot of S5')
p20 <- ggplot(data=mdata,aes(x=S6)) +
geom_histogram(aes(y = ..density..), colour = 1, fill = "white", bins=30, alpha=.25) +
geom_density()+
labs(x='', y='Density', title = 'Histogram distribution of S6')
p19 <- ggplot(data=mdata,aes(x=1:4000, y=S6)) +
geom_line() +
labs(x='Index', y=expression(theta[10]), title = 'trace plot of S6')
p1 + p2
ggsave('fig4row1.jpg', width = 25)
p3 + p4
ggsave('fig4row2.jpg', width = 25)
p5 + p6
ggsave('fig4row3.jpg', width = 25)
p7 + p8
ggsave('fig4row4.jpg', width = 25)
p9 + p10
ggsave('fig4row5.jpg', width = 25)
p11 + p12
ggsave('fig4row6.jpg', width = 25)
p13 + p14
ggsave('fig4row7.jpg', width = 25)
p15 + p16
ggsave('fig4row8.jpg', width = 25)
p17 + p18
ggsave('fig4row9.jpg', width = 25)
p19 + p20
ggsave('fig4row10.jpg', width = 25)
resf <- list(table1=table1, table2=table2, table3=table3, table4=table4, table5_hpd=table5_hpd,table5_credible=table5_cri);
}
return('resf')
}