See this GitHub repository for more information: BVSSurv.
################################################################################
# Required packages
################################################################################
rm(list=ls())
#library(devtools)
#install_github("davidrusi/mombf")
library(mombf)
## Loading required package: mvtnorm
## Loading required package: ncvreg
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
library(mvtnorm)
library(survival)
library(ggplot2)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("Biobase")
library(Biobase)
## Warning: package 'Biobase' was built under R version 4.2.1
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.2.1
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(BhGLM)
library(BVSNLP)
## Bayesian Variable Selection using Non-Local priors for survival and logistic regression data.
## Loading Version 1.1.8
library(eha)
################################################################################
# Data preparation
################################################################################
# flchain data set
data(flchain)
head(flchain)
## age sex sample.yr kappa lambda flc.grp creatinine mgus futime death
## 1 97 F 1997 5.70 4.860 10 1.7 0 85 1
## 2 92 F 2000 0.87 0.683 1 0.9 0 1281 1
## 3 94 F 1997 4.36 3.850 10 1.4 0 69 1
## 4 92 F 1996 2.42 2.220 9 1.0 0 115 1
## 5 93 F 1996 1.32 1.690 6 1.1 0 1039 1
## 6 90 F 1997 2.01 1.860 9 1.0 0 1355 1
## chapter
## 1 Circulatory
## 2 Neoplasms
## 3 Circulatory
## 4 Circulatory
## 5 Circulatory
## 6 Mental
# Variables of interest
colnames(flchain)[c(2,4,5,7,8,9,10)]
## [1] "sex" "kappa" "lambda" "creatinine" "mgus"
## [6] "futime" "death"
# Complete cases of the variables of interest
data.c <- flchain[,c(2,4,5,7,8,9,10)]
data.c <- data.c[complete.cases(data.c),]
dim(data.c)
## [1] 6524 7
head(data.c)
## sex kappa lambda creatinine mgus futime death
## 1 F 5.70 4.860 1.7 0 85 1
## 2 F 0.87 0.683 0.9 0 1281 1
## 3 F 4.36 3.850 1.4 0 69 1
## 4 F 2.42 2.220 1.0 0 115 1
## 5 F 1.32 1.690 1.1 0 1039 1
## 6 F 2.01 1.860 1.0 0 1355 1
# Assigning half-day survival to the zero-survivors
data.c$futime <- ifelse(data.c$futime==0,0.5,data.c$futime)
ys <- Surv(log(data.c$futime/365.25), data.c$death)
data.c$kappa <- scale(data.c$kappa) # scaled kappa
data.c$creatinine <- scale(data.c$creatinine) # scaled creatinine
data.c$lambda <- scale(data.c$lambda) # scaled lambda
data.c$mgus <- scale(data.c$mgus) # scaled mgus
data.c$sex <- as.numeric(data.c$sex)-1 # sex 0-1
data.c$sex <- scale(data.c$sex) # scaled sex
X <- data.c[,-c(6,7)]; # Design matrix
dim(X)
## [1] 6524 5
# Data frame with all the data
df <- data.c
# Visualise the Kaplan-Meier estimator (of survival)
km <- survfit(Surv(futime/365.25, death) ~ 1, data = df)
plot(km, xlab = "Years", ylab = "Survival", main = "Kaplan-Meier estimator",
cex.axis = 1.5, cex.lab = 1.5)
################################################################################
# Bayesian Variable Selection (BVS)
################################################################################
# Regression formula with linear effects
paste(colnames(X), collapse='+')
## [1] "sex+kappa+lambda+creatinine+mgus"
f1y <- as.formula(ys ~sex+kappa+lambda+creatinine+mgus)
#-------------------------------------------------------------------------------
# BVS: Prior 1 - Zellner's prior
#-------------------------------------------------------------------------------
# Priors for the coefficients
priorCoef= zellnerprior(tau=nrow(X))
# Prior on the model space
priorDelta= modelbbprior(1,1)
# Selection step
ms1 <- modelSelection(f1y,data=df,
priorCoef=priorCoef,priorDelta=priorDelta,
enumerate=TRUE, method='Laplace',
center = FALSE, scale = FALSE)
## Enumerating models...
## Zellner prior not implemented, using group Zellner prior instead
## Computing posterior probabilities........... Done.
# Calculating model posterior probabilities
pp1 <- postProb(ms1)
# Top models
head(pp1)
## modelid family pp
## 45 1,3,4 normal 0.703838711
## 41 1,3 normal 0.154613263
## 47 1,3,4,5 normal 0.047558767
## 61 1,2,3,4 normal 0.038608525
## 46 1,3,4,6 normal 0.030595652
## 42 1,3,6 normal 0.005164849
# Marginal inclusion probabilities
mp1 <- ms1$margpp
# Visualising the marginal inclusion probabilities
df1<-data.frame(names=names(mp1),mp = as.vector(round(mp1,digits = 3)))
df1
## names mp
## 1 (Intercept) 1.000
## 2 sex 0.051
## 3 kappa 1.000
## 4 lambda 0.831
## 5 creatinine 0.059
## 6 mgus 0.044
ggplot(df1,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
#-------------------------------------------------------------------------------
# BVS: Prior 2 - MOM prior (Laplace)
#-------------------------------------------------------------------------------
# Priors for the coefficients
priorCoefm= momprior(tau = 0.192) # p-mom
# Prior on the model space
priorDelta= modelbbprior(1,1)
# Selection step
ms2 <- modelSelection(f1y,data=df,
priorCoef=priorCoefm,priorDelta=priorDelta,
enumerate=TRUE, method='Laplace',
center = FALSE, scale = FALSE)
## Enumerating models...
## Computing posterior probabilities........... Done.
# Calculating model posterior probabilities
pp2 <- postProb(ms2)
# Top models
head(pp2)
## modelid family pp
## 41 1,3 normal 8.006134e-01
## 45 1,3,4 normal 1.990169e-01
## 42 1,3,6 normal 2.933692e-04
## 46 1,3,4,6 normal 6.838268e-05
## 43 1,3,5 normal 3.651998e-06
## 57 1,2,3 normal 2.312149e-06
# Marginal inclusion probabilities
mp2 <- ms2$margpp
# Visualising the marginal inclusion probabilities
df2<-data.frame(names=names(mp2),mp = as.vector(round(mp2,digits = 3)))
df2
## names mp
## 1 (Intercept) 1.000
## 2 sex 0.000
## 3 kappa 1.000
## 4 lambda 0.199
## 5 creatinine 0.000
## 6 mgus 0.000
ggplot(df2,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
#-------------------------------------------------------------------------------
# BVS: Prior 2 - MOM prior (ALA)
#-------------------------------------------------------------------------------
# Selection step
ms3 <- modelSelection(f1y,data=df,
priorCoef=priorCoefm,priorDelta=priorDelta,
enumerate=TRUE, method='ALA',
center = FALSE, scale = FALSE)
## Enumerating models...
## Computing posterior probabilities........... Done.
# Calculating model posterior probabilities
pp3 <- postProb(ms3)
# Top models
head(pp3)
## modelid family pp
## 41 1,3 normal 0.53303230
## 45 1,3,4 normal 0.34788272
## 61 1,2,3,4 normal 0.02301631
## 57 1,2,3 normal 0.01976501
## 47 1,3,4,5 normal 0.01913925
## 46 1,3,4,6 normal 0.01678421
# Marginal inclusion probabilities
mp3 <- ms3$margpp
# Visualising the marginal inclusion probabilities
df3<-data.frame(names=names(mp3),mp = as.vector(round(mp3,digits = 3)))
df3
## names mp
## 1 (Intercept) 1.000
## 2 sex 0.050
## 3 kappa 1.000
## 4 lambda 0.413
## 5 creatinine 0.040
## 6 mgus 0.039
ggplot(df3,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
# AFT model using a lognormal baseline distribution
fit.aft <- aftreg(formula = Surv(futime, death) ~ kappa + lambda, data=df,
dist = "lognormal")
summary(fit.aft)
## Covariate W.mean Coef Time-Accn se(Coef) LR p
## kappa -0.133 0.524 1.689 0.049 0.0000
## lambda -0.118 0.160 1.174 0.048 0.0009
##
## Events 1962
## Total time at risk 23796306
## Max. log. likelihood -20238
## LR test statistic 610.70
## Degrees of freedom 2
## Overall p-value 0
#####################################################################
# Cox PiMOM priors (sensitivity analysis)
#####################################################################
# Survival object with times
y2 <- Surv(time = data.c$futime/365.25, data.c$death)
X2 <- X
bvsfit <- bvs(X = data.frame(X2), resp = as.matrix(cbind(data.c$futime/365.2,data.c$death)),
family = "survival", nlptype = "piMOM", mod_prior = "unif",
niter = 100)
# Models with highest probability
head(bvsfit$max_models)
## [[1]]
## [,1]
## [1,] 2
##
## [[2]]
## [,1]
## [1,] 2
## [2,] 4
##
## [[3]]
## [,1]
## [1,] 2
## [2,] 3
## [3,] 4
##
## [[4]]
## [,1]
## [1,] 2
## [2,] 5
##
## [[5]]
## [,1]
## [1,] 2
## [2,] 3
##
## [[6]]
## [,1]
## [1,] 3
# Highest probabilities
head(exp(bvsfit$max_prob_vec - max(bvsfit$max_prob_vec))/sum(exp(bvsfit$max_prob_vec - max(bvsfit$max_prob_vec) )))
## [1] 9.264891e-01 6.983535e-02 3.666899e-03 6.322414e-06 1.393818e-06
## [6] 6.025645e-07
# Highest posterior model
bvsfit$HPM
## [,1]
## [1,] 2
# Estimated coefficients
bvsfit$beta_hat
## [1] 0.2326395
# Number of Visited Models
bvsfit$num_vis_models
## [1] 28
# Visualising the marginal inclusion probabilities
dfi<-data.frame(names=names(mp1)[-1],mp = as.vector(round(bvsfit$inc_probs,digits = 3)))
dfi
## names mp
## 1 sex 0.000
## 2 kappa 1.000
## 3 lambda 0.004
## 4 creatinine 0.074
## 5 mgus 0.000
ggplot(dfi,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
#####################################################################
# Spike-and-Slab LASSO (sensitivity analysis)
#####################################################################
# Selection step
spsl <- bmlasso(x = X2, y = y2, family = "cox")
## Warning: from glmnet C++ code (error code -30001); Numerical error at 1th
## lambda value; solutions for larger values of lambda returned
## Warning in getcoef(fit, nvars, nx, vnames): an empty model has been returned;
## probably a convergence issue
# Posterior modes (only option in this package, no MCMC)
spsl$coef
## sex kappa lambda creatinine mgus
## 0.00000000 0.21704395 0.10736486 -0.08341914 -0.03383896
# Posterior modes
plot.bh(coefs = spsl$coef, threshold = spsl$df, gap = 1,
main = "Spike-and-Slab LASSO", lty = 2)
# Posterior Odds-Ratios for the modes
plot.bh(coefs = spsl$coef, threshold = spsl$df, gap = 1,
main = "Spike-and-Slab LASSO", lty = 2, OR = TRUE)
#####################################################################
# Cox-LASSO (sensitivity analysis)
#####################################################################
# Survival object with times
y2 <- Surv(time = df$futime/365.25, event=df$death)
X2 <- as.matrix(X)
# Selection step
cv.fit = try(cv.glmnet(x = X2, y = y2, family="cox",
maxit=10000, nfolds=10, alpha=1), silent=TRUE)
fit = try(glmnet(x = X2, y=y2, family = "cox", maxit=10000, alpha=1), silent=TRUE)
# active variables (lambda.min)
b.coxlasso = as.double(coef(fit, s=cv.fit$lambda.min))
which(b.coxlasso!=0)
## [1] 2
colnames(X)[which(b.coxlasso!=0)]
## [1] "kappa"
# active variables (lambda.1se)
b2.coxlasso = as.double(coef(fit, s=cv.fit$lambda.1se))
which(b2.coxlasso!=0)
## [1] 2
colnames(X)[which(b2.coxlasso!=0)]
## [1] "kappa"
# Fitting a CoxPH model using the survival R package
fit2 <- coxph(Surv(futime, death) ~ kappa + lambda, data=df)
summary(fit2)
## Call:
## coxph(formula = Surv(futime, death) ~ kappa + lambda, data = df)
##
## n= 6524, number of events= 1962
##
## coef exp(coef) se(coef) z Pr(>|z|)
## kappa 0.14348 1.15428 0.01930 7.433 1.06e-13 ***
## lambda 0.11388 1.12062 0.02151 5.295 1.19e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## kappa 1.154 0.8663 1.111 1.199
## lambda 1.121 0.8924 1.074 1.169
##
## Concordance= 0.675 (se = 0.006 )
## Likelihood ratio test= 391.2 on 2 df, p=<2e-16
## Wald test = 937.5 on 2 df, p=<2e-16
## Score (logrank) test = 1114 on 2 df, p=<2e-16