基于R多基因联合诊断
- 单基因作为单因素诊断
- 多基因作为多因素联合诊断
- 多基因联合其它因素的多因素联合诊断
require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0 √ purrr 0.3.2
## √ tibble 2.1.3 √ dplyr 0.8.3
## √ tidyr 0.8.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
head(data)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <int> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 967 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 584 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 2654 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 754 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 2048 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 1027 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
mydata<-data %>%
filter(group=="TNBC") %>%
mutate(Os_time=Os_time/365)
单基因绘制time-dependent ROC曲线
## NNE法
library(survivalROC)
require(tidyverse)
require(plotROC)
## Loading required package: plotROC
## Warning: package 'plotROC' was built under R version 3.6.1
cutoff <-5
## KM法
Marker=survivalROC(Stime=mydata$Os_time,
status=mydata$OS_event,
marker = mydata$CCL14,
predict.time = cutoff, method="KM")
Marker
## $cut.values
## [1] -Inf 0.000000 2.764371 2.924143 3.013492 3.368869 3.372704
## [8] 3.380531 3.383352 3.404173 3.639117 3.681864 3.684274 3.694475
## [15] 3.826683 3.832320 3.837611 3.841204 3.862057 4.008267 4.122496
## [22] 4.127543 4.192953 4.206136 4.277356 4.307256 4.350595 4.361507
## [29] 4.388059 4.391348 4.411623 4.465049 4.476592 4.496105 4.508291
## [36] 4.508534 4.510887 4.515009 4.546374 4.558978 4.562307 4.570123
## [43] 4.597573 4.621799 4.630608 4.642392 4.680380 4.681196 4.731325
## [50] 4.766927 4.782249 4.803477 4.814836 4.815900 4.821098 4.824940
## [57] 4.839364 4.853309 4.869806 4.888687 4.895099 4.920121 4.924500
## [64] 4.970021 4.981412 4.985364 5.002359 5.012307 5.031341 5.036695
## [71] 5.037322 5.040377 5.049134 5.059956 5.062036 5.087198 5.088730
## [78] 5.093088 5.133692 5.139749 5.160141 5.180651 5.189872 5.193554
## [85] 5.197480 5.204326 5.236154 5.239599 5.243999 5.252909 5.257710
## [92] 5.267747 5.294264 5.297290 5.307405 5.317649 5.338654 5.338988
## [99] 5.351677 5.389814 5.389831 5.395811 5.454811 5.468569 5.488709
## [106] 5.495506 5.502559 5.504139 5.587883 5.599267 5.656008 5.779213
## [113] 5.853471
##
## $TP
## [1] 1.00000000 0.99808355 0.99081669 0.98180926 0.97699993 0.96795363
## [7] 0.90966608 0.90167922 0.89542025 0.90739536 0.90496765 0.89851135
## [13] 0.89206969 0.90431716 0.89864196 0.88947214 0.88172635 0.87507995
## [19] 0.87081513 0.86303657 0.87485906 0.86800520 0.81669823 0.82885412
## [25] 0.82237965 0.81524242 0.82778067 0.84123031 0.85570380 0.84666405
## [31] 0.86198677 0.85147473 0.84096270 0.85690715 0.87427836 0.86421309
## [37] 0.85596773 0.84774696 0.83721432 0.82590061 0.81458691 0.77962381
## [43] 0.77686893 0.76845482 0.76006426 0.75093447 0.73972649 0.72936304
## [49] 0.71814207 0.73230711 0.69736217 0.68653888 0.67760286 0.67406610
## [55] 0.63935925 0.63032160 0.63227747 0.64652159 0.63536663 0.62569118
## [61] 0.61388568 0.62815420 0.61583745 0.60560848 0.55123388 0.55191664
## [67] 0.55311746 0.54559679 0.53347242 0.54690454 0.53667559 0.47743997
## [73] 0.46660187 0.47951218 0.49434221 0.50076289 0.42695373 0.41713194
## [79] 0.37856270 0.39297290 0.38106463 0.38476778 0.37483396 0.36233950
## [85] 0.35237412 0.33978933 0.34253275 0.32935841 0.27725217 0.28866832
## [91] 0.27915023 0.29209269 0.23592952 0.22413305 0.21233657 0.20054009
## [97] 0.19213391 0.15838625 0.15610880 0.14723025 0.11073728 0.10150917
## [103] 0.10766124 0.11627414 0.10335479 0.09043544 0.09689512 0.08074593
## [109] 0.04306450 0.04844756 0.06459675 0.03229837 0.00000000
##
## $FP
## [1] 1.00000000 0.95310893 0.94388689 0.93530610 0.92517866 0.91661219
## [7] 0.92618705 0.91723027 0.90763690 0.89132578 0.88032091 0.87080024
## [13] 0.86127418 0.84486272 0.83505428 0.82653332 0.81748772 0.80803709
## [19] 0.79770904 0.78867552 0.77242062 0.76304642 0.77004952 0.75367180
## [25] 0.74415782 0.73488802 0.71836943 0.70151507 0.68428350 0.67571462
## [31] 0.65817018 0.65014372 0.64211725 0.62434376 0.60604462 0.59785356
## [37] 0.58899202 0.58012141 0.57210253 0.56437142 0.55664030 0.55762203
## [43] 0.54673770 0.53793832 0.52913027 0.52059456 0.51282449 0.50474329
## [49] 0.49697800 0.47986007 0.48083512 0.47292332 0.46431623 0.45371995
## [55] 0.45460727 0.44603762 0.43341776 0.41627070 0.40848110 0.40014642
## [61] 0.39259649 0.37544044 0.36807886 0.35994811 0.36808138 0.35593055
## [67] 0.34358887 0.33446033 0.32702788 0.31017997 0.30204921 0.31197336
## [73] 0.30406703 0.28741136 0.27004844 0.25578367 0.27107696 0.26279620
## [79] 0.26510650 0.24789825 0.24038618 0.22712259 0.21888310 0.21158700
## [85] 0.20335914 0.19609631 0.18318631 0.17614068 0.18343824 0.16733305
## [91] 0.15894040 0.14227289 0.15106510 0.14351184 0.13595859 0.12840533
## [97] 0.11960303 0.12013698 0.10907675 0.10044848 0.10199384 0.09349435
## [103] 0.07932854 0.06425612 0.05711655 0.04997698 0.03569784 0.02974820
## [109] 0.03173142 0.01784892 0.00000000 0.00000000 0.00000000
##
## $predict.time
## [1] 5
##
## $Survival
## [1] 0.7307712
##
## $AUC
## [1] 0.6342068
plot(Marker$FP, Marker$TP, ## x=FP,y=TP
type="l",col="#BC3C29FF", ##线条设置
xlim=c(0,1), ylim=c(0,1),
xlab=("1-specificity"), ##连接
ylab="sensitivity",
main="5 year ROC by CCL14")## \n换行符
abline(0,1,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste("5 year AUC =",round(Marker$AUC,3)),
x.intersp=1, y.intersp=0.8,
lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
bty = "n",# bty框的类型
seg.len=1,cex=0.8)#

for循环批量绘制
- 先绘制单个图形,再根据单个图形批量绘制
- 重要的是如何在tibble中按列名索引
- 法1:[[colname]]两个中括号中加列名
- 法2:先提取,再专为矩阵,再转为数值,as.numeric(as.matrix())
cutoff<-5
head(mydata)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 2.65 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 1.6 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 7.27 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 2.07 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 5.61 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 2.81 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
for (i in colnames(mydata)[3:6]) {
Marker[[i]] =survivalROC(Stime=mydata$Os_time,
status=mydata$OS_event,
marker = mydata[[i]],## 在tibble中获取列值的方法
predict.time = cutoff, method="KM")
##
plot(Marker[[i]][[3]], Marker[[i]][[2]], ## x=FP,y=TP
type="l",col="#BC3C29FF", ##线条设置
xlim=c(0,1), ylim=c(0,1),
lwd=2,
xlab=("1-specificity"), ##连接
ylab="sensitivity",
main= paste("5 year ROC by ",i,sep=""))## \n换行符
abline(0,1,lwd=2,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste("5 year AUC =",round(Marker[[i]][[6]],3)),
x.intersp=1, y.intersp=0.8,
lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
bty = "n",# bty框的类型
seg.len=1,cex=0.8)#
}




包装为函数
- 输入感兴趣的基因名,输出ROC曲线
- 绘制任意时间,任意基因的ROC曲线
require(customLayout)
## Loading required package: customLayout
## Warning: package 'customLayout' was built under R version 3.6.1
require(gridExtra)
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
head(mydata)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 2.65 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 1.6 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 7.27 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 2.07 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 5.61 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 2.81 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
gene_ROC<-function(i="CCL14",data=mydata,cutoff=5){
Marker[[i]] =survivalROC(Stime=mydata$Os_time,
status=mydata$OS_event,
marker = mydata[[i]],## 在tibble中获取列值的方法
predict.time = cutoff, method="KM")
## 绘图
plot(Marker[[i]][[3]], Marker[[i]][[2]], ## x=FP,y=TP
type="l",col="#BC3C29FF", ##线条设置
xlim=c(0,1), ylim=c(0,1),
lwd=2,
xlab=("1-specificity"), ##连接
ylab="sensitivity",
main= paste(cutoff,"year ROC by",i,sep=" "))## \n换行符
abline(0,1,lwd=2,col="gray",lty=2)##线条颜色
## legend可指定位置或"bottomright"等参数
legend(0.6,0.2,paste(cutoff, " year AUC =",round(Marker[[i]][[6]],3)),
x.intersp=1, y.intersp=0.8,
lty= 1 ,lwd= 2,col=c("#BC3C29FF","#0072B5FF"),
bty = "n",# bty框的类型
seg.len=1,cex=0.8)#
}
## 绘制CCL16, 3年AUC
gene_ROC(i="CCL16",cutoff = 6)

## 批量绘制
name<-colnames(mydata)[3:6]
## 组图2*2
par(mfrow=c(2,2))
lapply(name,gene_ROC,cutoff=10)

## [[1]]
## [[1]]$rect
## [[1]]$rect$w
## [1] 0.6167995
##
## [[1]]$rect$h
## [1] 0.2653816
##
## [[1]]$rect$left
## [1] 0.6
##
## [[1]]$rect$top
## [1] 0.2
##
##
## [[1]]$text
## [[1]]$text$x
## [1] 0.7306071
##
## [[1]]$text$y
## [1] 0.06730918
##
##
##
## [[2]]
## [[2]]$rect
## [[2]]$rect$w
## [1] 0.6167995
##
## [[2]]$rect$h
## [1] 0.2653816
##
## [[2]]$rect$left
## [1] 0.6
##
## [[2]]$rect$top
## [1] 0.2
##
##
## [[2]]$text
## [[2]]$text$x
## [1] 0.7306071
##
## [[2]]$text$y
## [1] 0.06730918
##
##
##
## [[3]]
## [[3]]$rect
## [[3]]$rect$w
## [1] 0.6167995
##
## [[3]]$rect$h
## [1] 0.2653816
##
## [[3]]$rect$left
## [1] 0.6
##
## [[3]]$rect$top
## [1] 0.2
##
##
## [[3]]$text
## [[3]]$text$x
## [1] 0.7306071
##
## [[3]]$text$y
## [1] 0.06730918
##
##
##
## [[4]]
## [[4]]$rect
## [[4]]$rect$w
## [1] 0.6167995
##
## [[4]]$rect$h
## [1] 0.2653816
##
## [[4]]$rect$left
## [1] 0.6
##
## [[4]]$rect$top
## [1] 0.2
##
##
## [[4]]$text
## [[4]]$text$x
## [1] 0.7306071
##
## [[4]]$text$y
## [1] 0.06730918
多基因联合诊断
- 多基因联合诊断需要先构建模型,计算多基因联合评分后
- 之后都是相同的操作
## 首先构建模型
genes<-colnames(mydata)[3:6]
FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
FML
## OS_event ~ CCL14 + HBA1 + CCL16 + TUBB3
multifit<-glm(FML,family=binomial(), data = mydata)
multifit
##
## Call: glm(formula = FML, family = binomial(), data = mydata)
##
## Coefficients:
## (Intercept) CCL14 HBA1 CCL16 TUBB3
## -7.3255 0.2712 0.1565 0.3302 0.5422
##
## Degrees of Freedom: 114 Total (i.e. Null); 110 Residual
## Null Deviance: 99.79
## Residual Deviance: 92.66 AIC: 102.7
## 计算出一个综合评分
mydata$Comb<-predict.glm(multifit,type = "link")
## 用之前构建好的函数计算
gene_ROC(i="Comb",cutoff = 6)

将多个基因的ROC曲线绘制在一起
- 输入感兴趣的基因自动绘制多个曲线在一起
- plotROC发基于ggplot2美化
先以单个基因绘制
head(mydata)
## # A tibble: 6 x 23
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 2.65 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 1.6 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 7.27 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 2.07 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 5.61 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 2.81 0 1027
## # ... with 13 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>,
## # Comb <dbl>
## 先绘制单个基因
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
marker = mydata$CCL14,
predict.time = 5, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
## TPF FPF c time
## 1 1.0000000 1.0000000 -Inf 5
## 2 0.9659247 0.9649750 0.000000 5
## 3 0.9574059 0.9562188 2.764371 5
## 4 0.9488871 0.9474626 2.924143 5
## 5 0.9420720 0.9381226 3.013492 5
## 6 0.9352570 0.9287826 3.368869 5
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(labels = FALSE, stat = "identity") +
style_roc()+
ggsci::scale_color_jama()
pROC

calc_auc(pROC)$AUC
## [1] -0.652296
## 修改图例标题并增加AUC
pROC+scale_color_hue(guide=guide_legend(title = "Time"),labels="5 year")+
annotate("text",x = .75, y = .25, ## 注释text的位置
label = paste("AUC of 5 years =", round(-calc_auc(pROC)$AUC[1], 2)))+
ggtitle("5 year ROC by CCL14")
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

plotROC绘制感兴趣的某个ROC
gene_roc<-function(i,data=mydata,cutoff=5){
stroc <- survivalROC(Stime = data$Os_time, status = mydata$OS_event,
marker = data[[i]],
predict.time = cutoff, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(labels = FALSE, stat = "identity") +
style_roc()+
ggsci::scale_color_jama()
pROC
calc_auc(pROC)$AUC
## 修改图例标题并增加AUC
pROC+ scale_color_hue(guide=guide_legend(title = "Time"),labels=paste(cutoff,"years"))+
annotate("text",x = .75, y = .25, ## 注释text的位置
label = paste("AUC of",i,"=",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
ggtitle(paste(cutoff,"years ROC by",i,sep = " "))
}
## 输入感兴趣的基因
gene_roc(i="CCL16")

## 同理可批量绘制
p<-lapply(name,gene_roc,cutoff=7)
多个曲线绘制到一起
if(F){
sroc <- lapply(name, function(i="CCL14",t=5,Comb=T){
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
marker = mydata[[i]],
predict.time = t, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]],length(stroc[["FP"]])),
gene = rep(i, length(stroc[["FP"]])))
}
)
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)
if (Comb==T){
FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
FML
multifit<-glm(FML,family=binomial(), data = mydata)
multifit
## 计算出一个综合评分
mydata$Comb<-predict.glm(multifit,type = "link")
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
marker = mydata[[Comb]],
predict.time = t, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
Comb<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]],length(stroc[["FP"]])),
gene = rep("Comb", length(stroc[["FP"]])))
sroclong<-rbind(Comb,sroclong)}
## 绘图
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = gene)) +
geom_roc(labels = FALSE, stat = "identity") +
style_roc()+
ggsci::scale_color_jama()+
scale_color_hue(guide=guide_legend(title = "Time"),labels=paste(cutoff,"years"))+
annotate("text", ## 注释text的位置
label = paste("AUC of",i,"=",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
annotate("text", ## 注释text的位置
label = paste("AUC of Comb =",round(-calc_auc(pROC)$AUC[1], 2)),sep=" ")+
ggtitle(paste(t,"years ROC",sep = " "))
}