See this GitHub repository for more information: BVSSurv.
################################################################################
# Required packages
################################################################################
rm(list=ls())
#library(devtools)
#install_github("davidrusi/mombf")
library(mombf)
## Loading required package: mvtnorm
## Loading required package: ncvreg
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
library(mvtnorm)
library(survival)
library(ggplot2)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(penalized)
## Welcome to penalized. For extended examples, see vignette("penalized").
library(BhGLM)
library(BVSNLP)
## Bayesian Variable Selection using Non-Local priors for survival and logistic regression data.
## Loading Version 1.1.8
library(eha)
################################################################################
# Data preparation
################################################################################
# nki70 data set
data(nki70)
head(nki70)
## time event Diam N ER Grade Age TSPYL5
## 125 7.748118 0 <=2cm 1-3 Positive Intermediate 50 -0.18752814
## 127 4.662560 1 <=2cm 1-3 Positive Well diff 42 0.15099047
## 128 8.739220 0 >2cm 1-3 Positive Well diff 50 0.11695046
## 129 7.567420 0 <=2cm 1-3 Positive Intermediate 43 0.10493318
## 130 7.296372 0 <=2cm 1-3 Negative Poorly diff 47 0.30821656
## 132 6.718686 0 <=2cm 1-3 Positive Intermediate 47 -0.09643536
## Contig63649_RC DIAPH3 NUSAP1 AA555029_RC ALDH4A1 QSCN6L1
## 125 -0.15304662 -0.29514052 -0.56582890 -0.21864109 -0.18037880 -0.18778528
## 127 -0.21005843 0.03355057 0.03654657 -0.06474594 -0.04812997 -0.02583513
## 128 -0.25813878 0.07791767 -0.08834376 -0.31973700 0.12525019 -0.08740829
## 129 -0.13687348 -0.01984126 -0.27212360 -0.05067248 0.03215318 -0.18070639
## 130 0.03544526 0.15589646 0.17281797 -0.13445797 0.36857538 0.17349207
## 132 -0.03772432 -0.05882551 0.10173758 -0.19559405 0.46424057 0.21204866
## FGF18 DIAPH3.1 Contig32125_RC BBC3 DIAPH3.2 RP5.860F19.3
## 125 -0.3168283 -0.30118455 -0.11340617 0.01841731 -0.12126035 -0.15464786
## 127 -0.4251015 0.20449784 0.01609294 0.07505098 0.05913143 -0.06155694
## 128 -0.2362195 0.23651714 0.16024944 0.11902553 0.07661457 0.23242917
## 129 -0.1419119 -0.02985957 0.13519350 0.07215202 -0.00671681 -0.09624129
## 130 -0.4445576 0.10981625 -0.03809348 -0.29777674 0.06185603 -0.20635196
## 132 -0.2425910 0.03225047 -0.27078994 0.02057201 0.01687964 0.24147416
## C16orf61 SCUBE2 EXT1 FLT1 GNAZ OXCT1
## 125 -0.27191718 -0.5309360 -0.22933165 0.02928248 -0.16906364 -0.17994325
## 127 -0.04152763 0.2755300 -0.20714263 -0.14841641 -0.02920126 -0.19713944
## 128 -0.18314464 0.2569269 -0.27430646 0.04648250 -0.13084890 -0.21275366
## 129 -0.15029917 0.4460836 0.04636695 -0.20511649 0.11025708 -0.23951208
## 130 0.19203352 -1.0115741 -0.04521220 0.08084737 -0.34581318 0.02778352
## 132 0.12535125 -0.1460709 -0.18753993 0.12485686 -0.01081727 0.12973975
## MMP9 RUNDC1 Contig35251_RC ECT2 GMPS KNTC2
## 125 -0.06216645 0.08771493 -0.03748896 -0.14785939 -0.25709099 -0.12766957
## 127 -0.34564791 -0.18203129 -0.04763378 0.48057208 -0.13851168 0.20359676
## 128 -0.58431851 0.13181509 -0.13576519 0.12846822 -0.40220331 0.27733344
## 129 -0.34800534 0.05287077 -0.05992130 -0.01444154 0.12604074 -0.02530796
## 130 -0.33550292 -0.04808210 0.59748058 0.14647630 0.05912505 -0.09803689
## 132 -0.58003703 0.15926624 -0.32724674 -0.03543524 -0.05018147 -0.02103562
## WISP1 CDC42BPA SERF1A AYTL2 GSTM3 GPR180
## 125 -0.103970962 -0.22440171 -0.13623551 -0.00943934 0.18968065 -0.09527189
## 127 0.059727386 0.17774149 0.08862783 -0.07273406 -0.32643774 -0.11081668
## 128 -0.104370258 -0.30138575 -0.17445309 -0.02608074 -0.06174661 -0.11274356
## 129 -0.019752857 -0.01414852 0.04716683 0.02204883 0.17564504 -0.10823898
## 130 -0.115149133 -0.10022007 0.07047200 0.08180754 -0.13776980 -0.02990369
## 132 -0.003368632 0.28115470 -0.03168261 0.11869773 0.49423377 -0.22673707
## RAB6B ZNF533 RTN4RL1 UCHL5 PECI MTDH
## 125 -0.33731975 0.2407761 -0.03656945 -0.123890903 0.06760955 -0.20642116
## 127 -0.21529011 -0.6575788 -0.04677012 -0.156722709 -0.22302344 -0.18877393
## 128 -0.22379708 -0.2540729 0.20323237 -0.255128549 0.03640988 -0.30561849
## 129 -0.22652800 0.6282427 -0.06701637 -0.056430938 0.08511313 0.11851041
## 130 0.86482037 -0.5190545 0.03135030 0.029619073 -0.08378990 -0.08777178
## 132 0.01575178 -0.5319210 0.16334775 0.004188184 0.10674758 -0.09680589
## Contig40831_RC TGFB3 MELK COL4A2 DTL STK32B
## 125 -0.07429184 -0.06135932 -0.21344635 -0.127032605 -0.18927757 0.01829870
## 127 -0.01618233 -0.05713522 0.09387326 -0.022670332 0.08865691 -0.04945412
## 128 -0.23945992 -0.04796306 -0.08149188 0.124976570 0.11635899 0.08609764
## 129 -0.11257978 0.04509865 -0.11172188 -0.006662163 0.01839956 0.10228288
## 130 0.46141386 -0.16287206 0.16674915 -0.055920131 -0.04819787 -0.12461618
## 132 0.01856611 0.20005740 0.09845308 0.169000275 0.27382112 0.26558747
## DCK FBXO31 GPR126 SLC2A3 PECI.1 ORC6L
## 125 -0.09026170 -0.182893415 -0.2665960 -0.091842878 0.086508268 -0.4034660
## 127 -0.05270824 -0.049127789 -0.2523930 -0.151373696 -0.265191420 -0.1214570
## 128 -0.12256248 -0.004431049 -0.5727796 -0.001815838 0.126026721 -0.2435477
## 129 -0.01966015 -0.077150861 -0.3350556 -0.171287546 0.152326453 -0.2134011
## 130 -0.01326674 0.006056118 0.5583633 -0.041406614 -0.004976858 0.2180532
## 132 -0.01326124 0.097569969 -0.3588504 -0.135215803 0.217921322 0.1857359
## RFC4 CDCA7 LOC643008 MS4A7 MCM6 AP2B1
## 125 -0.07140894 -0.7081020 -0.21647791 0.3891096 -0.24465049 -0.006829476
## 127 -0.06349157 -0.3799276 -0.16775584 -0.2639724 -0.03788761 -0.058419391
## 128 -0.21301104 -0.5395539 -0.46657100 0.0511888 -0.03944086 0.149224299
## 129 0.02711840 -0.7213023 -0.08879391 0.2989184 -0.03860127 0.211246095
## 130 0.23928807 -0.2651383 -0.04699785 -0.3150114 -0.08967836 0.045294491
## 132 0.01566120 -0.3429606 -0.58301663 -0.2052776 0.10927470 0.212807005
## C9orf30 IGFBP5 HRASLS PITRM1 IGFBP5.1 NMU
## 125 -0.15281719 -0.4054488 -0.142758680 -0.10247684 -0.12025904 -0.1406405
## 127 -0.10425668 -0.1912919 0.108491978 -0.12130020 -0.10450723 -0.1519105
## 128 -0.24606486 -0.1701802 -0.101253002 -0.22033592 -0.05874402 -0.4281867
## 129 -0.09965539 0.0333303 -0.008607991 -0.19819464 0.02323837 0.0380348
## 130 0.14046342 -0.5446732 0.283235973 0.24493145 -0.39492836 -0.1254536
## 132 0.12639875 -0.3115924 -0.147548222 0.08855865 -0.18184989 -0.1548281
## PALM2.AKAP2 LGP2 PRC1 Contig20217_RC CENPA EGLN1
## 125 0.14555775 -0.14472450 -0.52684309 -0.292264818 -0.66289668 -0.07776238
## 127 -0.19957775 -0.07023199 0.16582445 -0.001630732 0.07559530 -0.02740977
## 128 -0.00484497 0.13994153 0.03219222 -0.408806797 -0.18530553 -0.03002308
## 129 -0.20718903 0.14901039 -0.28617581 -0.169653442 -0.24485773 -0.20554683
## 130 -0.14454180 -0.29507530 0.05301084 0.100878267 0.15004378 0.09907450
## 132 0.04568845 -0.13650872 -0.12680534 -0.238719246 0.04713565 0.01681536
## NM_004702 ESM1 C20orf46
## 125 -0.193992588 -0.15085953 -0.26754983
## 127 0.277568989 -0.15958054 -0.14436334
## 128 -0.026720628 -0.27082971 0.02245105
## 129 -0.054464200 -0.14249819 -0.21887805
## 130 0.003166855 0.06127841 0.08325228
## 132 -0.048960844 0.18745720 -0.10600440
dim(nki70)
## [1] 144 77
mean(nki70$event)
## [1] 0.3333333
# Survival object
ys= Surv(log(nki70$time), event=nki70$event)
# Design matrix
X <- nki70[,-c(1:2)];
# Diam as binary
X$Diam <- as.numeric(X$Diam) - 1
# N as binary
X$N <- as.numeric(X$N)
# ER as binary
X$ER <- as.numeric(X$ER) - 1
# Grade as numeric
# Ideally, we would do Grade as factor
X$Grade <- as.numeric(X$Grade)
# Scale the remaining entries
X <- apply(X,2,scale)
#X[,-c(1:4)] <- apply(X[,-c(1:4)],2,scale)
df <- data.frame(ys = ys, as.data.frame(X), time = nki70$time, status = nki70$event)
# Visualise the Kaplan-Meier estimator (of survival)
km <- survfit(Surv(time, status) ~ 1, data = df)
plot(km, xlab = "Years", ylab = "Survival", main = "Kaplan-Meier estimator",
cex.axis = 1.5, cex.lab = 1.5)
################################################################################
# Bayesian Variable Selection (BVS)
################################################################################
# Regression formula with linear effects
f1y <- formula(paste('ys ~ ',paste(colnames(X), collapse='+'),sep=''))
#-------------------------------------------------------------------------------
# BVS: Prior 1 - Zellner's prior
#-------------------------------------------------------------------------------
# Priors for the coefficients
priorCoef= zellnerprior(tau=nrow(X))
# Prior on the model space
priorDelta= modelbbprior(1,1)
# Selection step
ms1 <- modelSelection(f1y,data = df,
priorCoef=priorCoef,priorDelta=priorDelta,
enumerate=FALSE, method='Laplace', niter=10000,
center = FALSE, scale = FALSE)
## Zellner prior not implemented, using group Zellner prior instead
## Greedy searching posterior mode... Done.
## Zellner prior not implemented, using group Zellner prior instead
## Running Gibbs sampler........... Done.
# Calculating model posterior probabilities
pp1 <- postProb(ms1)
# Top models
head(pp1)
## modelid family pp
## 1 1,70 normal 0.10181443
## 2 1,31,70 normal 0.08023293
## 3 1,6,70 normal 0.04424246
## 4 1,31,66,70 normal 0.03286287
## 5 1,66,70 normal 0.02784807
## 6 1,6,31,70 normal 0.02561205
# Marginal inclusion probabilities
mp1 <- ms1$margpp
names(mp1) <- c("Intercept",colnames(X))
# Visualising the marginal inclusion probabilities
df1<-data.frame(names=names(mp1),mp = as.vector(round(mp1,digits = 3)))
df1 <- df1[order(mp1, decreasing = TRUE),][1:10,]
ggplot(df1,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
# Fitting a CoxPH model using the survival R package
fit0 <- coxph(Surv(time, event) ~ PRC1 + KNTC2, data=nki70)
summary(fit0)
## Call:
## coxph(formula = Surv(time, event) ~ PRC1 + KNTC2, data = nki70)
##
## n= 144, number of events= 48
##
## coef exp(coef) se(coef) z Pr(>|z|)
## PRC1 4.40961 82.23733 0.83047 5.310 1.1e-07 ***
## KNTC2 -3.28477 0.03745 1.17437 -2.797 0.00516 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## PRC1 82.23733 0.01216 16.150043 418.7592
## KNTC2 0.03745 26.70279 0.003748 0.3742
##
## Concordance= 0.788 (se = 0.031 )
## Likelihood ratio test= 29.69 on 2 df, p=4e-07
## Wald test = 29.24 on 2 df, p=4e-07
## Score (logrank) test = 28.65 on 2 df, p=6e-07
# AFT model using a lognormal baseline distribution
fit.aft <- aftreg(formula = Surv(time, event) ~ PRC1 + KNTC2, data=nki70,
dist = "lognormal")
#-------------------------------------------------------------------------------
# BVS: Prior 2 - MOM prior (Laplace approximation)
#-------------------------------------------------------------------------------
# Priors for the coefficients
priorCoefm= momprior(tau = 0.05) # p-mom
# Prior on the model space
priorDelta= modelbbprior(1,1)
# Selection step
ms2 <- modelSelection(f1y,data=df,
priorCoef=priorCoefm,priorDelta=priorDelta,
enumerate=FALSE, method='Laplace', niter=10000,
center = FALSE, scale = FALSE)
## Greedy searching posterior mode... Done.
## Running Gibbs sampler........... Done.
# Calculating model posterior probabilities
pp2 <- postProb(ms2)
# Top models
head(pp2)
## modelid family pp
## 1 1,70 normal 0.32884519
## 2 1,6,70 normal 0.03914489
## 3 1,66,70 normal 0.03900059
## 4 1,63,70 normal 0.03315568
## 5 1,3,70 normal 0.03120171
## 6 1,39,70 normal 0.02704241
# Marginal inclusion probabilities
mp2 <- ms2$margpp
names(mp2) <- c("Intercept",colnames(X))
# Visualising the marginal inclusion probabilities
df2<-data.frame(names=names(mp2),mp = as.vector(round(mp2,digits = 3)))
df2 <- df2[order(mp2, decreasing = TRUE),][1:10,]
ggplot(df2,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
#-------------------------------------------------------------------------------
# BVS: Prior 3 - MOM prior (Approximate Laplace approximation)
#-------------------------------------------------------------------------------
# Priors for the coefficients
priorCoefm= momprior(taustd = 0.05) # p-mom
# Prior on the model space
priorDelta= modelbbprior(1,1)
# Selection step
ms3 <- modelSelection(f1y,data=df,
priorCoef=priorCoefm,priorDelta=priorDelta,
enumerate=FALSE, method='ALA', niter=10000,
center = FALSE, scale = FALSE)
## Greedy searching posterior mode... Done.
## Running Gibbs sampler........... Done.
# Calculating model posterior probabilities
pp3 <- postProb(ms3)
# Top models
head(pp3)
## modelid family pp
## 1 1 normal 0.24217125
## 2 1,70 normal 0.04718202
## 3 1,13 normal 0.02119279
## 4 1,39 normal 0.01875962
## 5 1,72 normal 0.01869341
## 6 1,3 normal 0.01799816
# Marginal inclusion probabilities
mp3 <- ms3$margpp
names(mp3) <- c("Intercept",colnames(X))
# Visualising the marginal inclusion probabilities
df3 <-data.frame(names=names(mp3),mp = as.vector(round(mp3,digits = 3)))
df3 <- df3[order(mp3, decreasing = TRUE),][1:10,]
ggplot(df3,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
#####################################################################
# Cox PiMOM priors (sensitivity analysis)
#####################################################################
# Survival object with times
y2 <- Surv(time = df$time, df$status)
bvsfit <- bvs(X = data.frame(X), resp = as.matrix(cbind(df$time,df$status)),
family = "survival", nlptype = "piMOM", mod_prior = "unif",
niter = 100)
# Models with highest probability
head(bvsfit$max_models)
## [[1]]
## [,1]
## [1,] 69
##
## [[2]]
## [,1]
## [1,] 30
## [2,] 69
##
## [[3]]
## [,1]
## [1,] 65
## [2,] 69
##
## [[4]]
## [,1]
## [1,] 30
## [2,] 65
## [3,] 69
##
## [[5]]
## [,1]
## [1,] 62
## [2,] 69
##
## [[6]]
## [,1]
## [1,] 30
## [2,] 62
## [3,] 69
# Highest probabilities
head(exp(bvsfit$max_prob_vec - max(bvsfit$max_prob_vec))/sum(exp(bvsfit$max_prob_vec - max(bvsfit$max_prob_vec) )))
## [1] 0.17788901 0.04886200 0.03486345 0.03288978 0.02993577 0.02542419
# Highest posterior model
bvsfit$HPM
## [,1]
## [1,] 69
# Estimated coefficients
bvsfit$beta_hat
## [1] 0.6742049
# Number of Visited Models
bvsfit$num_vis_models
## [1] 2667
# Visualising the marginal inclusion probabilities
dfi<-data.frame(names=names(mp1)[-1],mp = as.vector(round(bvsfit$inc_probs,digits = 3)))
dfi <- dfi[order(dfi$mp, decreasing = TRUE),][1:10,]
ggplot(dfi,aes(x = reorder(names, -mp), y = mp,label=mp))+
geom_bar(stat="identity")+geom_text(size=2.5,hjust=0)+coord_flip()+
xlab("") + ylab("Marginal inclusion probability")
# Fitting a CoxPH model using the survival R package
fit1 <- coxph(Surv(time, event) ~ PRC1 + KNTC2, data=nki70)
summary(fit1)
## Call:
## coxph(formula = Surv(time, event) ~ PRC1 + KNTC2, data = nki70)
##
## n= 144, number of events= 48
##
## coef exp(coef) se(coef) z Pr(>|z|)
## PRC1 4.40961 82.23733 0.83047 5.310 1.1e-07 ***
## KNTC2 -3.28477 0.03745 1.17437 -2.797 0.00516 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## PRC1 82.23733 0.01216 16.150043 418.7592
## KNTC2 0.03745 26.70279 0.003748 0.3742
##
## Concordance= 0.788 (se = 0.031 )
## Likelihood ratio test= 29.69 on 2 df, p=4e-07
## Wald test = 29.24 on 2 df, p=4e-07
## Score (logrank) test = 28.65 on 2 df, p=6e-07
#####################################################################
# Spike-and-Slab LASSO (sensitivity analysis)
#####################################################################
# Selection step
spsl <- bmlasso(x = X, y = y2, family = "cox")
# Posterior modes (only option in this package, no MCMC)
spsl$coef
## Diam N ER Grade Age
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## TSPYL5 Contig63649_RC DIAPH3 NUSAP1 AA555029_RC
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## ALDH4A1 QSCN6L1 FGF18 DIAPH3.1 Contig32125_RC
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## BBC3 DIAPH3.2 RP5.860F19.3 C16orf61 SCUBE2
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## EXT1 FLT1 GNAZ OXCT1 MMP9
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## RUNDC1 Contig35251_RC ECT2 GMPS KNTC2
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## WISP1 CDC42BPA SERF1A AYTL2 GSTM3
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## GPR180 RAB6B ZNF533 RTN4RL1 UCHL5
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## PECI MTDH Contig40831_RC TGFB3 MELK
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## COL4A2 DTL STK32B DCK FBXO31
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## GPR126 SLC2A3 PECI.1 ORC6L RFC4
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## CDCA7 LOC643008 MS4A7 MCM6 AP2B1
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## C9orf30 IGFBP5 HRASLS PITRM1 IGFBP5.1
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## NMU PALM2.AKAP2 LGP2 PRC1 Contig20217_RC
## 0.0000000 0.0000000 0.0000000 0.6572798 0.0000000
## CENPA EGLN1 NM_004702 ESM1 C20orf46
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
# Posterior modes
plot.bh(coefs = spsl$coef, threshold = spsl$df, gap = 1,
main = "Spike-and-Slab LASSO", lty = 2)
# Posterior Odds-Ratios for the modes
plot.bh(coefs = spsl$coef, threshold = spsl$df, gap = 1,
main = "Spike-and-Slab LASSO", lty = 2, OR = TRUE)
#############################################################################################
# Cox-LASSO (sensitivity analysis). Results as in https://doi.org/10.1002/sta4.607
#############################################################################################
# Selection step
cv.fit = try(cv.glmnet(x = X, y = y2, family="cox",
maxit=1e4, nfolds=10, alpha=1), silent=TRUE)
## Warning: from glmnet C++ code (error code -69); Convergence for 69th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -52); Convergence for 52th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -55); Convergence for 55th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -57); Convergence for 57th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -52); Convergence for 52th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -54); Convergence for 54th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -51); Convergence for 51th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -57); Convergence for 57th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -58); Convergence for 58th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -53); Convergence for 53th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
## Warning: from glmnet C++ code (error code -59); Convergence for 59th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
fit = try(glmnet(x = X, y=y2, family = "cox", maxit=1e4, alpha=1), silent=TRUE)
## Warning: from glmnet C++ code (error code -69); Convergence for 69th lambda
## value not reached after maxit=10000 iterations; solutions for larger lambdas
## returned
# active variables (lambda.min)
b.coxlasso = as.double(coef(fit, s=cv.fit$lambda.min))
which(b.coxlasso!=0)
## [1] 2 12 38 65 69
colnames(X)[which(b.coxlasso!=0)]
## [1] "N" "QSCN6L1" "ZNF533" "IGFBP5.1" "PRC1"
# active variables (lambda.1se)
b2.coxlasso = as.double(coef(fit, s=cv.fit$lambda.1se))
which(b2.coxlasso!=0)
## integer(0)
colnames(X)[which(b2.coxlasso!=0)]
## character(0)
# Fitting a CoxPH model using the survival R package
fit2 <- coxph(Surv(time, event) ~ N + QSCN6L1 + ZNF533 + IGFBP5.1 + PRC1, data=nki70)
summary(fit2)
## Call:
## coxph(formula = Surv(time, event) ~ N + QSCN6L1 + ZNF533 + IGFBP5.1 +
## PRC1, data = nki70)
##
## n= 144, number of events= 48
##
## coef exp(coef) se(coef) z Pr(>|z|)
## N1-3 -0.6059 0.5456 0.3148 -1.925 0.0543 .
## QSCN6L1 1.1421 3.1333 0.7463 1.530 0.1260
## ZNF533 -0.4974 0.6081 0.3940 -1.263 0.2068
## IGFBP5.1 0.9248 2.5214 0.4451 2.078 0.0377 *
## PRC1 1.9860 7.2863 0.7944 2.500 0.0124 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## N1-3 0.5456 1.8329 0.2944 1.011
## QSCN6L1 3.1333 0.3192 0.7256 13.529
## ZNF533 0.6081 1.6445 0.2809 1.316
## IGFBP5.1 2.5214 0.3966 1.0538 6.033
## PRC1 7.2863 0.1372 1.5358 34.569
##
## Concordance= 0.793 (se = 0.029 )
## Likelihood ratio test= 38.36 on 5 df, p=3e-07
## Wald test = 37 on 5 df, p=6e-07
## Score (logrank) test = 39.92 on 5 df, p=2e-07