载入数据

require(tidyverse)
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0     √ purrr   0.3.2
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   0.8.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
load("F:/Bioinfor_project/Breast/AS_research/AS/data/AS_score.Rdata")
head(AS_score)
##                ID        AP        ME        RI       ALL
## 1 TCGA.A1.A0SK.01 -12511197 -18.18653 -417.6976 -313021.2
## 2 TCGA.A2.A04P.01 -12511202 -17.36901 -412.8365 -313013.0
## 3 TCGA.A2.A0CM.01 -12511193 -18.70468 -413.9731 -313018.5
## 4 TCGA.A2.A0T2.01 -12511148 -17.92170 -416.9330 -313006.0
## 5 TCGA.A8.A09X.01 -12511157 -18.41298 -414.5135 -313006.5
## 6 TCGA.AO.A03U.01 -12511258 -17.51236 -418.4836 -313023.7
load("F:/Bioinfor_project/Breast/AS_research/AS/clinical/BRCA_clinical.Rdata")
BRCA_clinical[1:5,1:5]
##              OS_event OS_time ER         PR_Status_nature2012 HER2      
## TCGA_A1_A0SK "1"      " 967"  "Negative" "Negative"           "Negative"
## TCGA_A2_A04P "1"      " 548"  "Negative" "Negative"           "Negative"
## TCGA_A2_A0CM "1"      " 754"  "Negative" "Negative"           "Negative"
## TCGA_A2_A0T2 "1"      " 255"  "Negative" "Negative"           "Negative"
## TCGA_A8_A09X "1"      " 426"  "Negative" "Negative"           "Negative"

数据清洗

## 修改ID
id<-gsub("\\.","_",AS_score$ID) %>% 
  substring(1,12)
AS_score$ID<-id

## 整合AS_score
BRCA_clinical<-as.data.frame(BRCA_clinical,stringsAsFactors=F   
) %>% 
  rownames_to_column(var = "ID") %>% 
  inner_join(AS_score,by="ID") %>% 
  as_tibble()
dim(BRCA_clinical)## 113 24
## [1] 113  24

多因素cox分析AP的独立性

## 对变量整合
BRCA_clinical<-within(BRCA_clinical,{
  T<-factor(T,labels = c("T1/T2","T1/T2","T3/T4","T3/T4"))
  N<-factor(N,labels = c("N0/N1","N0/N1","N2/N3","N2/N3"))
  M<-factor(M,labels = c("M0","M1"))
  #age<-as.numeric(as.character(age))
  age<- factor(ifelse(age>=median(age),">=56","<56"),labels = c(">=56","<56"))
  AP<-factor(ifelse(AP>median(AP),"high","low"),labels = c("high","low"))
  ME<-factor(ifelse(ME>median(ME),"high","low"),labels = c("high","low"))
  RI<-factor(ifelse(RI>median(RI),"high","low"),labels = c("high","low"))
  ALL<-factor(ifelse(ALL>median(ALL),"high","low"),labels = c("high","low"))

})

## 构建多cox函数
multicox<-function(vars=c("T","N","M","age"),data=mydata,forest=T){
   library(survival)
   require(survminer)
   y<-Surv(as.numeric(data[["OS_time"]]),as.numeric(data[["OS_event"]]))  
   ## 构建公式
   FM<-as.formula(paste0("y~",paste(vars,collapse = "+")))
   cox.fit_multi <- coxph(FM,data = data)
   munivar_res<-summary(cox.fit_multi)#cox的结果
   pvalue<-round(munivar_res$coefficients[,"Pr(>|z|)"],3)#pvalue
   HR<-round(munivar_res$coefficients[,"exp(coef)"],2)# HR
   lower<-round(munivar_res$conf.int[,3],2)
   upper<-round(munivar_res$conf.int[,4],2)
   ## 保存结果到数据框中
   munivar_res<-data.frame(
    varname<-vars,
    HR<-as.numeric(HR),
    CI=paste(as.numeric(lower),"-",as.numeric(upper),sep = ""),
    pvalue<-as.numeric(pvalue)
  )
  colnames(munivar_res)<-c("varname","HR","95%CI","pvalue")
  ## 保存pdf到工作目录
  if (forest==T){
  #ggforest(cox.fit_multi,data = data)
  ggsave(filename = "AP_cli_mult_cox.pdf")}
  munivar_res## 最后的函数运行结果
  
}
### 

### AP-无穷-可能与数据相关
res<-multicox(vars=c("AP","T","N","M","age","ME","RI"),data=BRCA_clinical,forest = F)
## Loading required package: survminer
## Loading required package: ggpubr
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Warning in fitter(X, Y, strats, offset, init, control, weights = weights, :
## Ran out of iterations and did not converge
res
##   varname   HR      95%CI pvalue
## 1      AP 0.00      0-Inf  0.994
## 2       T 5.15  1.43-18.5  0.012
## 3       N 6.82 2.05-22.67  0.002
## 4       M 1.04 0.09-11.45  0.976
## 5     age 0.70  0.23-2.09  0.519
## 6      ME 0.18  0.04-0.85  0.030
## 7      RI 0.08  0.02-0.38  0.001
#write.csv(res,file = "F:/Bioinfor_project/Breast/AS_research/AS/result/AP_munivar_cox.csv")

数据清洗

BRCA_clinical<-read.csv("F:/Bioinfor_project/Breast/AS_research/AS/clinical/TNBC_clinical_final.csv",header=TRUE,sep=",")
BRCA_clinical[1:5,1:5]
##       sampleID OS_time OS_event  X       ER
## 1 TCGA_A1_A0SK     967        1 NA Negative
## 2 TCGA_A2_A04P     548        1 NA Negative
## 3 TCGA_A2_A0CM     754        1 NA Negative
## 4 TCGA_A2_A0T2     255        1 NA Negative
## 5 TCGA_A8_A09X     426        1 NA Negative
## 修改ID
id<-gsub("\\.","_",AS_score$ID) %>% 
  substring(1,12)
AS_score$ID<-id

## 整合AS_score
BRCA_clinical<-BRCA_clinical %>% 
  rename(ID=sampleID) %>% 
  inner_join(AS_score,by="ID") %>% 
  as_tibble()
## Warning: Column `ID` joining factor and character vector, coercing into
## character vector
dim(BRCA_clinical)## 113 24
## [1] 113  25
BRCA_clinical
## # A tibble: 113 x 25
##    ID    OS_time OS_event X     ER    PR_Status_natur~ HER2  T     Node 
##    <chr>   <int>    <int> <lgl> <fct> <fct>            <fct> <fct> <fct>
##  1 TCGA~     967        1 NA    Nega~ Negative         Nega~ T2    Nega~
##  2 TCGA~     548        1 NA    Nega~ Negative         Nega~ T2    Posi~
##  3 TCGA~     754        1 NA    Nega~ Negative         Nega~ T2    Nega~
##  4 TCGA~     255        1 NA    Nega~ Negative         Nega~ T3    Posi~
##  5 TCGA~     426        1 NA    Nega~ Negative         Nega~ T2    Posi~
##  6 TCGA~    1793        1 NA    Nega~ Negative         Nega~ T1    Nega~
##  7 TCGA~     524        1 NA    Nega~ Negative         Nega~ T1    Posi~
##  8 TCGA~     571        1 NA    Nega~ Negative         Nega~ T4    Posi~
##  9 TCGA~    3063        1 NA    Nega~ Negative         Nega~ T2    Nega~
## 10 TCGA~     639        1 NA    Nega~ Negative         Nega~ T3    Posi~
## # ... with 103 more rows, and 16 more variables: N <fct>,
## #   Metastasis_status <fct>, M <fct>, age <int>, weight <int>,
## #   RFS_time <int>, RFS <int>, menopause_status <fct>, gender <fct>,
## #   patient_id <fct>, radiation_therapy <fct>,
## #   targeted_molecular_therapy <fct>, AP <dbl>, ME <dbl>, RI <dbl>,
## #   ALL <dbl>
## 写一个将因子转数值的函数
BRCA_clinical[["T"]]<-as.numeric(as.factor(BRCA_clinical[["T"]]))
BRCA_clinical[["N"]]<-as.numeric(as.factor(BRCA_clinical[["N"]]))
BRCA_clinical[["M"]]<-as.numeric(as.factor(BRCA_clinical[["M"]]))

###combine,TNM
multifit<-glm(as.numeric(OS_event)~T+N+M,family=binomial(), data = BRCA_clinical)
multifit
## 
## Call:  glm(formula = as.numeric(OS_event) ~ T + N + M, family = binomial(), 
##     data = BRCA_clinical)
## 
## Coefficients:
## (Intercept)            T            N            M  
##    -18.9108       0.3353       0.7062      15.3231  
## 
## Degrees of Freedom: 112 Total (i.e. Null);  109 Residual
## Null Deviance:       99.1 
## Residual Deviance: 87.92     AIC: 95.92
BRCA_clinical$TNM<-predict.glm(multifit,type = "link")

ROC曲线绘制

if (F){
require(survivalROC)
## Ge
Gene_roc<-function(genes=c("CCL16","CCL14","HBA1"),data=mydata,t=5,combine=T){
  ## 增加combine函数参数,将combine当成一个基因 
    if (combine==T){
    FML <- as.formula(paste0('OS_event~',paste(genes,collapse = '+')))
    multifit<-glm(FML,family=binomial(), data = data)
    data$combine<-predict.glm(multifit,type = "link")
    genes<-c(genes,"combine")
  }
  ## 将基因放到前面
  if(!require("tidyverse")) install.packages("tidyverse",update = F,ask = F)
  if(!require("ggsci")) install.packages("ggsci",update = F,ask = F)
  if(!require("plotROC")) install.packages("plotROC",update = F,ask = F)
  if(!require("survivalROC")) install.packages("survivalROC",update = F,ask = F)
  if(!require("survival")) install.packages("survival",update = F,ask = F)

  data<-data %>% select(genes,everything())
  ##
  sroc <- lapply(genes, function(i){
  stroc <- survivalROC(Stime =data$OS_time, status =data$OS_event, 
                       marker =data[[i]],
                       predict.time = t, method = "NNE",## KM法或NNE法
                       span = .25 * 350^(-.2))
  #span =  0.25*nobs^(-0.20))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             gene = rep(i, length(stroc[["FP"]])))
})
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(genes)){
 stroc[[i]] <- survivalROC(Stime =data$OS_time, status =data$OS_event, 
                       marker = data[[i]],##marker
                       predict.time = t, ##时间
                       method = "NNE",## KM法或NNE法
                      span = .25 * 350^(-.2))
                       #span = 0.25*nobs^(-0.20))
 ## 索引构建lab
 lab[i]<-paste(genes[i],":",round(stroc[[i]][[6]],3),sep = " ")
 
 }

### 绘制多基因图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = gene)) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    ggtitle(paste(t,"years ROC curve",sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    theme(legend.position = c(0.8,0.3),legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab)

}

## 绘制感兴趣的多个基因的五年ROC,并combine
Gene_roc(genes = c("AP","age","RI","ME","TNM"),data=BRCA_clinical,t=5,combine = T)
#ggsave("F:/Bioinfor_project/Breast/AS_research/AS/result/AP_compare_ROC.pdf",width = 5,height = 5)
#Gene_roc(genes="TNM")
}

AP_time_dependent

if(F){
require(survivalROC)
t_roc<-function(gene="CCL14",t=c(3,5,10),data=mydata){
### 时间3,5,10
sroc <- lapply(t, function(t){
  stroc <- survivalROC(Stime = data$OS_time, status = data$OS_event, 
                       marker = data[[gene]], 
                       predict.time = t, method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
  data.frame(TPF = stroc[["TP"]], FPF = stroc[["FP"]], 
             c = stroc[["cut.values"]], 
             time = rep(stroc[["predict.time"]], length(stroc[["FP"]])))
})

## 整合到数据框中
sroclong <- do.call(rbind, sroc)
class(sroclong)
head(sroclong)

## 索引找出各个AUC值命名为lab
lab<-c()
for (i in 1:length(t)){
 stroc[[i]] <- survivalROC(Stime =data$OS_time, status =data$OS_event, 
                       marker = data[[gene]] ,##marker
                       predict.time = t[i], ##时间
                       method = "NNE", ## KM法或NNE法
                       span = .25 * 350^(-.2))
 ## 索引构建lab
 lab[i]<-paste(t[i],"years :",round(stroc[[i]][[6]],3),sep = " ")
  }

##绘图
pROC<-ggplot(sroclong, aes(x = FPF, y = TPF,label=c,color = factor(time))) + 
  geom_roc(n.cuts=0,labels = FALSE, stat = "identity")  ## n.cuts设置cut点数
pROC+theme_bw()+
    xlab("1-specificity")+ylab("sensitivity")+ #坐标轴标签
    ggtitle(paste("Time dependent ROC curve of",gene,sep = " "))+
    geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
    ##调整位置适应
    theme(legend.position = c(0.8,0.35),## 设置legend的位置
    legend.background = element_blank())+
    scale_colour_lancet(name="AUC",labels=lab)
}
## 默认参数绘图
t_roc(gene="AP",t=c(2:10),data=BRCA_clinical)
}
#ggsave(filename = "F:/Bioinfor_project/Breast/AS_research/AS/result/AP_time_ROC.pdf",width = 5,height = 5)