基于pROC绘制ROC曲线
Sys.setlocale('LC_ALL','C')
## [1] "C"
if(!require("pROC")) install.packages("pROC",update = F,ask = F)
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
if(!require("ggplot2")) install.packages("ggplot2",update = F,ask = F)
## Loading required package: ggplot2
if(!require("ggsci")) install.packages("ggsci",update = F,ask = F)
## Loading required package: ggsci
library(plotROC)
## Warning: package 'plotROC' was built under R version 3.6.1
##
## Attaching package: 'plotROC'
## The following object is masked from 'package:pROC':
##
## ggroc
library(ggplot2)
library(ggsci)
require(survivalROC)
## Loading required package: survivalROC
require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## <U+221A> tibble 2.1.3 <U+221A> purrr 0.3.2
## <U+221A> tidyr 0.8.3 <U+221A> dplyr 0.8.3
## <U+221A> readr 1.3.1 <U+221A> stringr 1.4.0
## <U+221A> tibble 2.1.3 <U+221A> forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
head(data)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <int> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 967 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 584 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 2654 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 754 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 2048 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 1027 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
mydata<-data %>%
filter(group=="TNBC") %>%
mutate(Os_time=Os_time/365)
先绘制单个基因
head(mydata)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 2.65 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 1.6 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 7.27 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 2.07 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 5.61 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 2.81 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
## 先绘制单个基因
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
marker = mydata$CCL14,
predict.time = 5, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
## TPF FPF c time
## 1 1.0000000 1.0000000 -Inf 5
## 2 0.9659247 0.9649750 0.000000 5
## 3 0.9574059 0.9562188 2.764371 5
## 4 0.9488871 0.9474626 2.924143 5
## 5 0.9420720 0.9381226 3.013492 5
## 6 0.9352570 0.9287826 3.368869 5
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC

## 修改图例标题并增加AUC
pROC+theme_bw() +
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
scale_colour_jco(name="AUC",labels=paste("CCL14 ",round(stroc$AUC,2)))

包装为函数gene_roc
- 绘制感兴趣的基因的ROC曲线
- 绘制结果如上图
- 达成目的:绘制任意基因,任意时间的ROC曲线
gene_roc<-function(i,data=mydata,cutoff=5){
stroc <- survivalROC(Stime = data$Os_time, status = mydata$OS_event,
marker = data[[i]],
predict.time = cutoff, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC
## 修改图例标题并增加AUC
pROC+theme_bw()+
ggtitle(paste(cutoff,"years ROC by",i,sep = " "))+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
scale_colour_lancet(name="AUC",labels=paste(i, ":" ,round(stroc$AUC,2),sep=" "))
}
## 输入感兴趣的基因
gene_roc(i="CCL16")

将多个ROC曲线绘制到一起
genes<-colnames(mydata)[3:6]
sroc <- lapply(genes, function(i){
stroc <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event,
marker = mydata[[i]],
predict.time = 5, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
gene = rep(i, length(stroc[["FP"]])))
})
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
## TPF FPF c gene
## 1 1.0000000 1.0000000 -Inf CCL14
## 2 0.9659247 0.9649750 0.000000 CCL14
## 3 0.9574059 0.9562188 2.764371 CCL14
## 4 0.9488871 0.9474626 2.924143 CCL14
## 5 0.9420720 0.9381226 3.013492 CCL14
## 6 0.9352570 0.9287826 3.368869 CCL14
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
## TPF FPF c gene
## 1 1.0000000 1.0000000 -Inf CCL14
## 2 0.9659247 0.9649750 0.000000 CCL14
## 3 0.9574059 0.9562188 2.764371 CCL14
## 4 0.9488871 0.9474626 2.924143 CCL14
## 5 0.9420720 0.9381226 3.013492 CCL14
## 6 0.9352570 0.9287826 3.368869 CCL14
## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(genes)){
stroc[[i]] <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event,
marker = mydata[[i+2]],##marker
predict.time = 5, ##时间
method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 索引构建lab
lab[i]<-paste(genes[i],":",round(stroc[[i]][[6]],3),sep = " ")
}
### 绘制多基因图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = gene)) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC+theme_bw()+
xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
ggtitle("5 years ROC curve")+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
scale_colour_lancet(name="AUC",labels=lab)

包装为函数Gene_roc
- 达成目的:时间依赖,多因素联合诊断
- 对任意感兴趣的基因
- 对多个感兴趣的基因
- 对感兴趣的时间
- 多基因联合,时间依赖ROC曲线绘制,一键出图
- 参数1:基因
- 参数2:数据集,基因作为列名,或者其它变量(data.frame或tibble)
- 参数3:时间
- 参数4: combine=T,整合多个因素,多因素联合诊断
Gene_roc<-function(genes=c("CCL16","CCL14","HBA1"),data=mydata,t=5,combine=T){
## 增加combine函数参数,将combine当成一个基因
if (combine==T){
FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
multifit<-glm(FML,family=binomial(), data = data)
data$combine<-predict.glm(multifit,type = "link")
genes<-c(genes,"combine")
}
## 将基因放到前面
if(!require("tidyverse")) install.packages("tidyverse",update = F,ask = F)
data<-data %>% select(genes,everything())
##
sroc <- lapply(genes, function(i){
stroc <- survivalROC(Stime =data$Os_time, status =data$OS_event,
marker =data[[i]],
predict.time = t, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
gene = rep(i, length(stroc[["FP"]])))
})
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)
## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(genes)){
stroc[[i]] <- survivalROC(Stime =data$Os_time, status =data$OS_event,
marker = data[[i]],##marker
predict.time = t, ##时间
method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 索引构建lab
lab[i]<-paste(genes[i],":",round(stroc[[i]][[6]],3),sep = " ")
}
### 绘制多基因图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = gene)) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC+theme_bw()+
xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
ggtitle(paste(t,"years ROC curve",sep = " "))+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
scale_colour_lancet(name="AUC",labels=lab,limits=genes)
}
## 绘制感兴趣的多个基因的五年ROC,并combine
Gene_roc(genes = genes,data=mydata,t=8,combine = T)

绘制某个基因个时间点的ROC曲线
head(mydata)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 2.65 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 1.6 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 7.27 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 2.07 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 5.61 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 2.81 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
### 时间2,5,10
t<-c(3,5,10)
sroc <- lapply(t, function(t){
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event,
marker = mydata$CCL14,
predict.time = t, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
})
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
## TPF FPF c time
## 1 1.0000000 1.0000000 -Inf 3
## 2 0.9445458 0.9690617 0.000000 3
## 3 0.9306822 0.9613271 2.764371 3
## 4 0.9168186 0.9535925 2.924143 3
## 5 0.9057278 0.9453423 3.013492 3
## 6 0.8946369 0.9370921 3.368869 3
## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(t)){
stroc[[i]] <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event,
marker = mydata$CCL14,##marker
predict.time = t[i], ##时间
method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 索引构建lab
lab[i]<-paste(t[i],"years :",round(stroc[[i]][[6]],3),sep = " ")
}
##绘图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC+theme_bw()+
xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
#ggtitle(paste(t,"years ROC curve",sep = " "))+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
scale_colour_lancet(name="AUC",labels=lab)

包装为函数t_roc
- 达成目的:绘制时间依赖的ROC曲线
- 输入基因,绘制多个时间点的ROC曲线
- 输入其它类似于基因的指标,输出ROC曲线
t_roc<-function(gene="CCL14",t=c(3,5,10),data=mydata){
### 时间3,5,10
sroc <- lapply(t, function(t){
stroc <- survivalROC(Stime = data$Os_time, status = data$OS_event,
marker = data[[gene]],
predict.time = t, method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]],
c = stroc[["cut.values"]],
time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
})
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)
## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(t)){
stroc[[i]] <- survivalROC(Stime =data$Os_time, status =data$OS_event,
marker = data[[gene]] ,##marker
predict.time = t[i], ##时间
method = "NNE", ## KM法或NNE法
span = .25 * 350^(-.2))
## 索引构建lab
lab[i]<-paste(t[i],"years :",round(stroc[[i]][[6]],3),sep = " ")
}
##绘图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) +
geom_roc(n.cuts=0,labels = FALSE, stat = "identity") ## n.cuts设置cut点数
pROC+theme_bw()+
xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
ggtitle(paste("Time dependent ROC curve of",gene,sep = " "))+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
##调整位置适应
theme(legend.position = c(0.8,0.3),## 设置legend的位置
legend.background = element_blank())+
scale_colour_lancet(name="AUC",labels=lab)
}
## 默认参数绘图
t_roc()

设置参数
t_roc(gene="CCL16",t=c(2:10))
