基于R多基因联合诊断

require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0     √ purrr   0.3.2
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   0.8.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
head(data)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <int>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal     967        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal     584        0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    2654        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal     754        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    2048        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    1027        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
mydata<-data %>% 
  filter(group=="TNBC") %>% 
  mutate(Os_time=Os_time/365)

单基因绘制time-dependent ROC曲线

## NNE法
library(survivalROC)
require(tidyverse)
require(plotROC)
## Loading required package: plotROC
## Warning: package 'plotROC' was built under R version 3.6.1
cutoff <-5

## KM法
Marker=survivalROC(Stime=mydata$Os_time, 
    status=mydata$OS_event,      
    marker = mydata$CCL14,
    predict.time =  cutoff, method="KM")
Marker
## $cut.values
##   [1]     -Inf 0.000000 2.764371 2.924143 3.013492 3.368869 3.372704
##   [8] 3.380531 3.383352 3.404173 3.639117 3.681864 3.684274 3.694475
##  [15] 3.826683 3.832320 3.837611 3.841204 3.862057 4.008267 4.122496
##  [22] 4.127543 4.192953 4.206136 4.277356 4.307256 4.350595 4.361507
##  [29] 4.388059 4.391348 4.411623 4.465049 4.476592 4.496105 4.508291
##  [36] 4.508534 4.510887 4.515009 4.546374 4.558978 4.562307 4.570123
##  [43] 4.597573 4.621799 4.630608 4.642392 4.680380 4.681196 4.731325
##  [50] 4.766927 4.782249 4.803477 4.814836 4.815900 4.821098 4.824940
##  [57] 4.839364 4.853309 4.869806 4.888687 4.895099 4.920121 4.924500
##  [64] 4.970021 4.981412 4.985364 5.002359 5.012307 5.031341 5.036695
##  [71] 5.037322 5.040377 5.049134 5.059956 5.062036 5.087198 5.088730
##  [78] 5.093088 5.133692 5.139749 5.160141 5.180651 5.189872 5.193554
##  [85] 5.197480 5.204326 5.236154 5.239599 5.243999 5.252909 5.257710
##  [92] 5.267747 5.294264 5.297290 5.307405 5.317649 5.338654 5.338988
##  [99] 5.351677 5.389814 5.389831 5.395811 5.454811 5.468569 5.488709
## [106] 5.495506 5.502559 5.504139 5.587883 5.599267 5.656008 5.779213
## [113] 5.853471
## 
## $TP
##   [1] 1.00000000 0.99808355 0.99081669 0.98180926 0.97699993 0.96795363
##   [7] 0.90966608 0.90167922 0.89542025 0.90739536 0.90496765 0.89851135
##  [13] 0.89206969 0.90431716 0.89864196 0.88947214 0.88172635 0.87507995
##  [19] 0.87081513 0.86303657 0.87485906 0.86800520 0.81669823 0.82885412
##  [25] 0.82237965 0.81524242 0.82778067 0.84123031 0.85570380 0.84666405
##  [31] 0.86198677 0.85147473 0.84096270 0.85690715 0.87427836 0.86421309
##  [37] 0.85596773 0.84774696 0.83721432 0.82590061 0.81458691 0.77962381
##  [43] 0.77686893 0.76845482 0.76006426 0.75093447 0.73972649 0.72936304
##  [49] 0.71814207 0.73230711 0.69736217 0.68653888 0.67760286 0.67406610
##  [55] 0.63935925 0.63032160 0.63227747 0.64652159 0.63536663 0.62569118
##  [61] 0.61388568 0.62815420 0.61583745 0.60560848 0.55123388 0.55191664
##  [67] 0.55311746 0.54559679 0.53347242 0.54690454 0.53667559 0.47743997
##  [73] 0.46660187 0.47951218 0.49434221 0.50076289 0.42695373 0.41713194
##  [79] 0.37856270 0.39297290 0.38106463 0.38476778 0.37483396 0.36233950
##  [85] 0.35237412 0.33978933 0.34253275 0.32935841 0.27725217 0.28866832
##  [91] 0.27915023 0.29209269 0.23592952 0.22413305 0.21233657 0.20054009
##  [97] 0.19213391 0.15838625 0.15610880 0.14723025 0.11073728 0.10150917
## [103] 0.10766124 0.11627414 0.10335479 0.09043544 0.09689512 0.08074593
## [109] 0.04306450 0.04844756 0.06459675 0.03229837 0.00000000
## 
## $FP
##   [1] 1.00000000 0.95310893 0.94388689 0.93530610 0.92517866 0.91661219
##   [7] 0.92618705 0.91723027 0.90763690 0.89132578 0.88032091 0.87080024
##  [13] 0.86127418 0.84486272 0.83505428 0.82653332 0.81748772 0.80803709
##  [19] 0.79770904 0.78867552 0.77242062 0.76304642 0.77004952 0.75367180
##  [25] 0.74415782 0.73488802 0.71836943 0.70151507 0.68428350 0.67571462
##  [31] 0.65817018 0.65014372 0.64211725 0.62434376 0.60604462 0.59785356
##  [37] 0.58899202 0.58012141 0.57210253 0.56437142 0.55664030 0.55762203
##  [43] 0.54673770 0.53793832 0.52913027 0.52059456 0.51282449 0.50474329
##  [49] 0.49697800 0.47986007 0.48083512 0.47292332 0.46431623 0.45371995
##  [55] 0.45460727 0.44603762 0.43341776 0.41627070 0.40848110 0.40014642
##  [61] 0.39259649 0.37544044 0.36807886 0.35994811 0.36808138 0.35593055
##  [67] 0.34358887 0.33446033 0.32702788 0.31017997 0.30204921 0.31197336
##  [73] 0.30406703 0.28741136 0.27004844 0.25578367 0.27107696 0.26279620
##  [79] 0.26510650 0.24789825 0.24038618 0.22712259 0.21888310 0.21158700
##  [85] 0.20335914 0.19609631 0.18318631 0.17614068 0.18343824 0.16733305
##  [91] 0.15894040 0.14227289 0.15106510 0.14351184 0.13595859 0.12840533
##  [97] 0.11960303 0.12013698 0.10907675 0.10044848 0.10199384 0.09349435
## [103] 0.07932854 0.06425612 0.05711655 0.04997698 0.03569784 0.02974820
## [109] 0.03173142 0.01784892 0.00000000 0.00000000 0.00000000
## 
## $predict.time
## [1] 5
## 
## $Survival
## [1] 0.7307712
## 
## $AUC
## [1] 0.6342068
plot(Marker$FP, Marker$TP, ## x=FP,y=TP
     type="l",col="#BC3C29FF", ##线条设置
     xlim=c(0,1), ylim=c(0,1),   
     xlab=("1-specificity"), ##连接
     ylab="sensitivity",
     main="5 year ROC by CCL14")## \n换行符
abline(0,1,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste("5 year AUC =",round(Marker$AUC,3)),
                 x.intersp=1, y.intersp=0.8,
                 lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
                 bty = "n",# bty框的类型
                 seg.len=1,cex=0.8)# 

for循环批量绘制

cutoff<-5
head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <dbl>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal    2.65        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal    1.6         0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    7.27        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal    2.07        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    5.61        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    2.81        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
for (i in colnames(mydata)[3:6]) {
  Marker[[i]] =survivalROC(Stime=mydata$Os_time, 
    status=mydata$OS_event,      
    marker = mydata[[i]],## 在tibble中获取列值的方法
    predict.time =  cutoff, method="KM")
##
 plot(Marker[[i]][[3]], Marker[[i]][[2]], ## x=FP,y=TP
     type="l",col="#BC3C29FF", ##线条设置
     xlim=c(0,1), ylim=c(0,1),
     lwd=2,
     xlab=("1-specificity"), ##连接
     ylab="sensitivity",
     main= paste("5 year ROC by ",i,sep=""))## \n换行符
abline(0,1,lwd=2,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste("5 year AUC  =",round(Marker[[i]][[6]],3)),
                 x.intersp=1, y.intersp=0.8,
                 lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
                 bty = "n",# bty框的类型
                 seg.len=1,cex=0.8)# 
}

包装为函数

require(customLayout)
## Loading required package: customLayout
## Warning: package 'customLayout' was built under R version 3.6.1
require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <dbl>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal    2.65        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal    1.6         0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    7.27        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal    2.07        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    5.61        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    2.81        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
gene_ROC<-function(i="CCL14",data=mydata,cutoff=5){
    Marker[[i]] =survivalROC(Stime=mydata$Os_time, 
                           status=mydata$OS_event,      
                           marker = mydata[[i]],## 在tibble中获取列值的方法
                            predict.time =  cutoff, method="KM")
## 绘图
 plot(Marker[[i]][[3]], Marker[[i]][[2]], ## x=FP,y=TP
     type="l",col="#BC3C29FF", ##线条设置
     xlim=c(0,1), ylim=c(0,1),
     lwd=2,
     xlab=("1-specificity"), ##连接
     ylab="sensitivity",
     main= paste(cutoff,"year ROC by",i,sep=" "))## \n换行符
abline(0,1,lwd=2,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste(cutoff, " year AUC =",round(Marker[[i]][[6]],3)),
                 x.intersp=1, y.intersp=0.8,
                 lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
                 bty = "n",# bty框的类型
                 seg.len=1,cex=0.8)# 
}
## 绘制CCL16, 3年AUC
gene_ROC(i="CCL16",cutoff = 6)

## 批量绘制
name<-colnames(mydata)[3:6]

## 组图2*2
par(mfrow=c(2,2))
lapply(name,gene_ROC,cutoff=10)

## [[1]]
## [[1]]$rect
## [[1]]$rect$w
## [1] 0.6167995
## 
## [[1]]$rect$h
## [1] 0.2653816
## 
## [[1]]$rect$left
## [1] 0.6
## 
## [[1]]$rect$top
## [1] 0.2
## 
## 
## [[1]]$text
## [[1]]$text$x
## [1] 0.7306071
## 
## [[1]]$text$y
## [1] 0.06730918
## 
## 
## 
## [[2]]
## [[2]]$rect
## [[2]]$rect$w
## [1] 0.6167995
## 
## [[2]]$rect$h
## [1] 0.2653816
## 
## [[2]]$rect$left
## [1] 0.6
## 
## [[2]]$rect$top
## [1] 0.2
## 
## 
## [[2]]$text
## [[2]]$text$x
## [1] 0.7306071
## 
## [[2]]$text$y
## [1] 0.06730918
## 
## 
## 
## [[3]]
## [[3]]$rect
## [[3]]$rect$w
## [1] 0.6167995
## 
## [[3]]$rect$h
## [1] 0.2653816
## 
## [[3]]$rect$left
## [1] 0.6
## 
## [[3]]$rect$top
## [1] 0.2
## 
## 
## [[3]]$text
## [[3]]$text$x
## [1] 0.7306071
## 
## [[3]]$text$y
## [1] 0.06730918
## 
## 
## 
## [[4]]
## [[4]]$rect
## [[4]]$rect$w
## [1] 0.6167995
## 
## [[4]]$rect$h
## [1] 0.2653816
## 
## [[4]]$rect$left
## [1] 0.6
## 
## [[4]]$rect$top
## [1] 0.2
## 
## 
## [[4]]$text
## [[4]]$text$x
## [1] 0.7306071
## 
## [[4]]$text$y
## [1] 0.06730918

多基因联合诊断

## 首先构建模型
genes<-colnames(mydata)[3:6]
FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
FML
## OS_event ~ CCL14 + HBA1 + CCL16 + TUBB3
multifit<-glm(FML,family=binomial(), data = mydata)
multifit
## 
## Call:  glm(formula = FML, family = binomial(), data = mydata)
## 
## Coefficients:
## (Intercept)        CCL14         HBA1        CCL16        TUBB3  
##     -7.3255       0.2712       0.1565       0.3302       0.5422  
## 
## Degrees of Freedom: 114 Total (i.e. Null);  110 Residual
## Null Deviance:       99.79 
## Residual Deviance: 92.66     AIC: 102.7
## 计算出一个综合评分
mydata$Comb<-predict.glm(multifit,type = "link")
## 用之前构建好的函数计算
gene_ROC(i="Comb",cutoff = 6)

将多个基因的ROC曲线绘制在一起

先以单个基因绘制

head(mydata)
## # A tibble: 6 x 23
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <dbl>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal    2.65        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal    1.6         0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    7.27        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal    2.07        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    5.61        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    2.81        0     1027
## # ... with 13 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>,
## #   Comb <dbl>
## 先绘制单个基因
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event, 
                     marker = mydata$CCL14, 
                     predict.time = 5, method = "NNE", ## KM法或NNE法
                     span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
##         TPF       FPF        c time
## 1 1.0000000 1.0000000     -Inf    5
## 2 0.9659247 0.9649750 0.000000    5
## 3 0.9574059 0.9562188 2.764371    5
## 4 0.9488871 0.9474626 2.924143    5
## 5 0.9420720 0.9381226 3.013492    5
## 6 0.9352570 0.9287826 3.368869    5
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(labels = FALSE, stat = "identity") + 
  style_roc()+
  ggsci::scale_color_jama()
pROC

calc_auc(pROC)$AUC
## [1] -0.652296
## 修改图例标题并增加AUC
pROC+scale_color_hue(guide=guide_legend(title = "Time"),labels="5 year")+
  annotate("text",x = .75, y = .25, ## 注释text的位置
           label = paste("AUC of 5 years =", round(-calc_auc(pROC)$AUC[1], 2)))+
  ggtitle("5 year ROC by CCL14")
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

plotROC绘制感兴趣的某个ROC

gene_roc<-function(i,data=mydata,cutoff=5){
stroc <- survivalROC(Stime = data$Os_time, status = mydata$OS_event, 
                     marker = data[[i]], 
                     predict.time = cutoff, method = "NNE", ## KM法或NNE法
                     span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(labels = FALSE, stat = "identity") + 
  style_roc()+
  ggsci::scale_color_jama()
pROC
calc_auc(pROC)$AUC
## 修改图例标题并增加AUC
pROC+  scale_color_hue(guide=guide_legend(title =           "Time"),labels=paste(cutoff,"years"))+
  annotate("text",x = .75, y = .25, ## 注释text的位置
           label = paste("AUC of",i,"=",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
  ggtitle(paste(cutoff,"years ROC by",i,sep = " "))
}
## 输入感兴趣的基因
gene_roc(i="CCL16")

## 同理可批量绘制
p<-lapply(name,gene_roc,cutoff=7)

多个曲线绘制到一起

if(F){
sroc <- lapply(name, function(i="CCL14",t=5,Comb=T){
  stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
                       marker = mydata[[i]], 
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]],length(stroc[["FP"]])),
             gene = rep(i, length(stroc[["FP"]])))
}
)

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

if (Comb==T){
FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
FML
multifit<-glm(FML,family=binomial(), data = mydata)
multifit
## 计算出一个综合评分
mydata$Comb<-predict.glm(multifit,type = "link")
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
                       marker = mydata[[Comb]], 
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
Comb<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]],length(stroc[["FP"]])),
             gene = rep("Comb", length(stroc[["FP"]])))
sroclong<-rbind(Comb,sroclong)} 

## 绘图
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = gene)) + 
  geom_roc(labels = FALSE, stat = "identity") + 
  style_roc()+
  ggsci::scale_color_jama()+
 scale_color_hue(guide=guide_legend(title =           "Time"),labels=paste(cutoff,"years"))+
  annotate("text", ## 注释text的位置
           label = paste("AUC of",i,"=",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
  annotate("text", ## 注释text的位置
           label = paste("AUC of Comb =",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
  ggtitle(paste(t,"years ROC",sep = " "))
}