基于pROC绘制ROC曲线

Sys.setlocale('LC_ALL','C')
## [1] "C"
if(!require("pROC")) install.packages("pROC",update = F,ask = F)
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
if(!require("ggplot2")) install.packages("ggplot2",update = F,ask = F)
## Loading required package: ggplot2
if(!require("ggsci")) install.packages("ggsci",update = F,ask = F)
## Loading required package: ggsci
library(plotROC)
## Warning: package 'plotROC' was built under R version 3.6.1
## 
## Attaching package: 'plotROC'
## The following object is masked from 'package:pROC':
## 
##     ggroc
library(ggplot2)
library(ggsci)
require(survivalROC)
## Loading required package: survivalROC
require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## <U+221A> tibble  2.1.3     <U+221A> purrr   0.3.2
## <U+221A> tidyr   0.8.3     <U+221A> dplyr   0.8.3
## <U+221A> readr   1.3.1     <U+221A> stringr 1.4.0
## <U+221A> tibble  2.1.3     <U+221A> forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
head(data)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <int>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal     967        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal     584        0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    2654        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal     754        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    2048        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    1027        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
mydata<-data %>% 
  filter(group=="TNBC") %>% 
  mutate(Os_time=Os_time/365)

先绘制单个基因

head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <dbl>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal    2.65        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal    1.6         0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    7.27        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal    2.07        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    5.61        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    2.81        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
## 先绘制单个基因
stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event, 
                     marker = mydata$CCL14, 
                     predict.time = 5, method = "NNE", ## KM法或NNE法
                     span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
##         TPF       FPF        c time
## 1 1.0000000 1.0000000     -Inf    5
## 2 0.9659247 0.9649750 0.000000    5
## 3 0.9574059 0.9562188 2.764371    5
## 4 0.9488871 0.9474626 2.924143    5
## 5 0.9420720 0.9381226 3.013492    5
## 6 0.9352570 0.9287826 3.368869    5
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC

## 修改图例标题并增加AUC
pROC+theme_bw() +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
    scale_colour_jco(name="AUC",labels=paste("CCL14 ",round(stroc$AUC,2)))

包装为函数gene_roc

gene_roc<-function(i,data=mydata,cutoff=5){
stroc <- survivalROC(Stime = data$Os_time, status = mydata$OS_event, 
                     marker = data[[i]], 
                     predict.time = cutoff, method = "NNE", ## KM法或NNE法
                     span = .25 * 350^(-.2))
## 提取出绘图需要的数据
stroclong<-data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
head(stroclong)
pROC<-ggplot(stroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC

## 修改图例标题并增加AUC
pROC+theme_bw()+
    ggtitle(paste(cutoff,"years ROC by",i,sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=paste(i, ":" ,round(stroc$AUC,2),sep=" "))
}
## 输入感兴趣的基因
gene_roc(i="CCL16")

将多个ROC曲线绘制到一起

genes<-colnames(mydata)[3:6]
sroc <- lapply(genes, function(i){
  stroc <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event, 
                       marker = mydata[[i]],
                       predict.time = 5, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             gene = rep(i, length(stroc[["FP"]])))
})
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
##         TPF       FPF        c  gene
## 1 1.0000000 1.0000000     -Inf CCL14
## 2 0.9659247 0.9649750 0.000000 CCL14
## 3 0.9574059 0.9562188 2.764371 CCL14
## 4 0.9488871 0.9474626 2.924143 CCL14
## 5 0.9420720 0.9381226 3.013492 CCL14
## 6 0.9352570 0.9287826 3.368869 CCL14
## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
##         TPF       FPF        c  gene
## 1 1.0000000 1.0000000     -Inf CCL14
## 2 0.9659247 0.9649750 0.000000 CCL14
## 3 0.9574059 0.9562188 2.764371 CCL14
## 4 0.9488871 0.9474626 2.924143 CCL14
## 5 0.9420720 0.9381226 3.013492 CCL14
## 6 0.9352570 0.9287826 3.368869 CCL14
## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(genes)){
 stroc[[i]] <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event, 
                       marker = mydata[[i+2]],##marker
                       predict.time = 5, ##时间
                       method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
 ## 索引构建lab
 lab[i]<-paste(genes[i],":",round(stroc[[i]][[6]],3),sep = " ")
 
 }

### 绘制多基因图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = gene)) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    ggtitle("5 years ROC curve")+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab)

包装为函数Gene_roc

Gene_roc<-function(genes=c("CCL16","CCL14","HBA1"),data=mydata,t=5,combine=T){
  ## 增加combine函数参数,将combine当成一个基因 
  if (combine==T){
    FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
    multifit<-glm(FML,family=binomial(), data = data)
    data$combine<-predict.glm(multifit,type = "link")
    genes<-c(genes,"combine")
  }
  ## 将基因放到前面
  if(!require("tidyverse")) install.packages("tidyverse",update = F,ask = F)
  data<-data %>% select(genes,everything())
  ##
  sroc <- lapply(genes, function(i){
  stroc <- survivalROC(Stime =data$Os_time, status =data$OS_event, 
                       marker =data[[i]],
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             gene = rep(i, length(stroc[["FP"]])))
})
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(genes)){
 stroc[[i]] <- survivalROC(Stime =data$Os_time, status =data$OS_event, 
                       marker = data[[i]],##marker
                       predict.time = t, ##时间
                       method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
 ## 索引构建lab
 lab[i]<-paste(genes[i],":",round(stroc[[i]][[6]],3),sep = " ")
 
 }

### 绘制多基因图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = gene)) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    ggtitle(paste(t,"years ROC curve",sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab,limits=genes)

}
## 绘制感兴趣的多个基因的五年ROC,并combine
Gene_roc(genes = genes,data=mydata,t=8,combine = T)

绘制某个基因个时间点的ROC曲线

head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <dbl>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal    2.65        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal    1.6         0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    7.27        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal    2.07        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    5.61        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    2.81        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
### 时间2,5,10
t<-c(3,5,10)
sroc <- lapply(t, function(t){
  stroc <- survivalROC(Stime = mydata$Os_time, status = mydata$OS_event, 
                       marker = mydata$CCL14, 
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
})

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
## [1] "data.frame"
head(sroclong)
##         TPF       FPF        c time
## 1 1.0000000 1.0000000     -Inf    3
## 2 0.9445458 0.9690617 0.000000    3
## 3 0.9306822 0.9613271 2.764371    3
## 4 0.9168186 0.9535925 2.924143    3
## 5 0.9057278 0.9453423 3.013492    3
## 6 0.8946369 0.9370921 3.368869    3
## 索引找出各个AUC值命名为lab

lab<-c()
for (i in 1:length(t)){
 stroc[[i]] <- survivalROC(Stime =mydata$Os_time, status =mydata$OS_event, 
                       marker = mydata$CCL14,##marker
                       predict.time = t[i], ##时间
                       method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
 ## 索引构建lab
 lab[i]<-paste(t[i],"years :",round(stroc[[i]][[6]],3),sep = " ")
 
 }

##绘图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    #ggtitle(paste(t,"years ROC curve",sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.2),legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab)

包装为函数t_roc

t_roc<-function(gene="CCL14",t=c(3,5,10),data=mydata){
### 时间3,5,10
sroc <- lapply(t, function(t){
  stroc <- survivalROC(Stime = data$Os_time, status = data$OS_event, 
                       marker = data[[gene]], 
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
})

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(t)){
 stroc[[i]] <- survivalROC(Stime =data$Os_time, status =data$OS_event, 
                       marker = data[[gene]] ,##marker
                       predict.time = t[i], ##时间
                       method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
 ## 索引构建lab
 lab[i]<-paste(t[i],"years :",round(stroc[[i]][[6]],3),sep = " ")
  }

##绘图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    ggtitle(paste("Time dependent ROC curve of",gene,sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    ##调整位置适应
    theme(legend.position = c(0.8,0.3),## 设置legend的位置
    legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab)
}
## 默认参数绘图
t_roc()

设置参数

t_roc(gene="CCL16",t=c(2:10))