cox_code.R

rm(list = ls())
data_for_cox <- read.csv("C:\\Users\\liyix\\OneDrive\\Desktop\\data_cox\\data_for_cox - Copy.csv",header = T,stringsAsFactors = F)
dim(data_for_cox) #[1] 139  11

## [1] 139  11

str(data_for_cox)

## 'data.frame':    139 obs. of  11 variables:
##  $ weeks                : int  6 22 9 6 1 8 2 4 4 1 ...
##  $ status               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ age                  : int  22 29 31 53 67 33 24 41 25 21 ...
##  $ weight               : int  63 65 56 53 54 63 51 54 62 58 ...
##  $ gender               : int  1 1 1 1 1 1 0 1 1 1 ...
##  $ liver.disease.history: int  0 1 0 0 1 0 0 0 0 0 ...
##  $ diabetes.history     : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ smoke                : int  0 1 1 1 1 0 0 0 0 0 ...
##  $ drink                : int  0 0 1 1 0 0 0 0 0 0 ...
##  $ Rule.resistance      : int  1 0 0 0 0 0 1 0 0 0 ...
##  $ Liver.medicine       : int  1 1 1 1 1 1 1 1 1 1 ...

data_for_cox[,1:11] <- lapply(data_for_cox[,1:11],as.numeric)
#str(data_for_cox)
#colnames(data_for_cox)
data_for_cox <- data_for_cox[,-c(10,11)]
#View(data_for_cox)
min(data_for_cox$age)

## [1] 18

max(data_for_cox$age)

## [1] 82

########################################################baseline table___1
baseline <- data_for_cox
#1 baseline
#1.1 group
baseline$Age_1[baseline$age > 55] <- ">50"
head(baseline)

##   weeks status age weight gender liver.disease.history diabetes.history smoke
## 1     6      1  22     63      1                     0                0     0
## 2    22      1  29     65      1                     1                0     1
## 3     9      1  31     56      1                     0                0     1
## 4     6      1  53     53      1                     0                1     1
## 5     1      1  67     54      1                     1                0     1
## 6     8      1  33     63      1                     0                0     0
##   drink Age_1
## 1     0  <NA>
## 2     0  <NA>
## 3     1  <NA>
## 4     1  <NA>
## 5     0   >50
## 6     0  <NA>

baseline$Age_1[baseline$age <= 55] <- "<=50"
#str(baseline)
baseline$weight_1[baseline$weight >55 ] <- ">55"
baseline$weight_1[baseline$weight <= 55] <- "<= 55"
max(baseline$weight) #[1] 76

## [1] 76

min(baseline$weight) #[1] 40

## [1] 40

max(baseline$age) #[1] 82

## [1] 82

min(baseline$age) #[1] 18

## [1] 18

#1.2 freq stat
base <- subset(baseline, select = -c(age,weight))
str(base)

## 'data.frame':    139 obs. of  9 variables:
##  $ weeks                : num  6 22 9 6 1 8 2 4 4 1 ...
##  $ status               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ gender               : num  1 1 1 1 1 1 0 1 1 1 ...
##  $ liver.disease.history: num  0 1 0 0 1 0 0 0 0 0 ...
##  $ diabetes.history     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ smoke                : num  0 1 1 1 1 0 0 0 0 0 ...
##  $ drink                : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ Age_1                : chr  "<=50" "<=50" "<=50" "<=50" ...
##  $ weight_1             : chr  ">55" ">55" ">55" "<= 55" ...

#View(base)
freq <- lapply(base[,2:9], table)
prop <- lapply(freq[1:9], prop.table)
class(prop)

## [1] "list"

###########
#1.3 build table
char <- NULL
#i = 1
for (i in 1:8) {
  character <- c(names(freq[i]), names(freq[[i]]))
  noc <- c(NA,paste0(freq[[i]], "(", round(prop[[i]]*100,0),"%)") )
  characteristics <- data.frame("characteristics" = character, "number of cases " = noc)
  char <- rbind(char,characteristics)
}
char

##          characteristics number.of.cases.
## 1                 status             <NA>
## 2                      0         105(76%)
## 3                      1          34(24%)
## 4                 gender             <NA>
## 5                      0          38(27%)
## 6                      1         101(73%)
## 7  liver.disease.history             <NA>
## 8                      0         120(86%)
## 9                      1          19(14%)
## 10      diabetes.history             <NA>
## 11                     0         125(90%)
## 12                     1          14(10%)
## 13                 smoke             <NA>
## 14                     0          85(61%)
## 15                     1          54(39%)
## 16                 drink             <NA>
## 17                     0         118(85%)
## 18                     1          21(15%)
## 19                 Age_1             <NA>
## 20                  <=50         125(90%)
## 21                   >50          14(10%)
## 22              weight_1             <NA>
## 23                 <= 55          64(46%)
## 24                   >55          75(54%)

#1.4 output
write.csv(char,"char.csv",na= "", row.names = F)
##########################################################single factor COX hazard model___2
#2.1 convert table
dim(baseline)

## [1] 139  11

baseline <- data_for_cox
str(baseline)

## 'data.frame':    139 obs. of  9 variables:
##  $ weeks                : num  6 22 9 6 1 8 2 4 4 1 ...
##  $ status               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age                  : num  22 29 31 53 67 33 24 41 25 21 ...
##  $ weight               : num  63 65 56 53 54 63 51 54 62 58 ...
##  $ gender               : num  1 1 1 1 1 1 0 1 1 1 ...
##  $ liver.disease.history: num  0 1 0 0 1 0 0 0 0 0 ...
##  $ diabetes.history     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ smoke                : num  0 1 1 1 1 0 0 0 0 0 ...
##  $ drink                : num  0 0 1 1 0 0 0 0 0 0 ...

dim(baseline)

## [1] 139   9

baseline[,1:9] <- lapply(baseline[,1:9],as.numeric)
str(baseline)

## 'data.frame':    139 obs. of  9 variables:
##  $ weeks                : num  6 22 9 6 1 8 2 4 4 1 ...
##  $ status               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age                  : num  22 29 31 53 67 33 24 41 25 21 ...
##  $ weight               : num  63 65 56 53 54 63 51 54 62 58 ...
##  $ gender               : num  1 1 1 1 1 1 0 1 1 1 ...
##  $ liver.disease.history: num  0 1 0 0 1 0 0 0 0 0 ...
##  $ diabetes.history     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ smoke                : num  0 1 1 1 1 0 0 0 0 0 ...
##  $ drink                : num  0 0 1 1 0 0 0 0 0 0 ...

#2.2 default method = efron
library(survival)
basesurv <- Surv(time = baseline$weeks, event = baseline$status)
baseline$basesurv <- with(baseline,basesurv) #
#
memory.limit()

## [1] 16218

#as.formula(paste0('basesurv ~',"x")) #basesurv ~ x
unicox <- function(x){
  fml <- as.formula(paste0('basesurv ~',x))
  gcox <- coxph(fml, data = baseline)
  gsum <- summary(gcox) #
  hr <- round(gsum$coefficients[,2],2)
  pvalue <- round(gsum$coefficients[,5],2)
  ci <- paste0(round(gsum$conf.int[,3:4],2), collapse = "-")
  unicox <- data.frame("characteristics" =  x,
                       "Hazard Ratio" = hr,
                       "ci95" = ci,
                       "p value" = pvalue)
  return(unicox)
}
unicox("age")

##   characteristics Hazard.Ratio      ci95 p.value
## 1             age         0.99 0.96-1.01    0.33

#
colnames(baseline)

##  [1] "weeks"                 "status"                "age"                  
##  [4] "weight"                "gender"                "liver.disease.history"
##  [7] "diabetes.history"      "smoke"                 "drink"                
## [10] "basesurv"

varnames <- colnames(baseline)[c(3:9)]
univar <- lapply(varnames, unicox)
univar <- do.call("rbind", univar)
str(univar)

## 'data.frame':    7 obs. of  4 variables:
##  $ characteristics: chr  "age" "weight" "gender" "liver.disease.history" ...
##  $ Hazard.Ratio   : num  0.99 1.04 2.38 1.46 0.25 1.02 1.54
##  $ ci95           : chr  "0.96-1.01" "1-1.08" "0.92-6.15" "0.6-3.54" ...
##  $ p.value        : num  0.33 0.04 0.07 0.4 0.17 0.95 0.31

#View(univar)
#
as.character(univar$characteristics[univar$p.value < 0.05])

## [1] "weight"

write.csv(univar,"univar.csv",na= "", row.names = F)
##########################################################multiple factors COX hazard model___3
fml <- as.formula(paste0('basesurv ~',paste0(as.character(univar$characteristics),
                                             collapse = '+')))
multicox <- coxph(fml, data = baseline)
##test of proportional hazards
res_cox <- cox.zph(multicox)
res_cox

##                         chisq df     p
## age                    0.4843  1 0.486
## weight                 2.6412  1 0.104
## gender                 2.0114  1 0.156
## liver.disease.history  0.3084  1 0.579
## diabetes.history       0.4944  1 0.482
## smoke                  0.0624  1 0.803
## drink                  1.9767  1 0.160
## GLOBAL                12.9623  7 0.073

#3.1 分层分析
multicox_str <- coxph(basesurv ~ age + weight + gender + liver.disease.history + diabetes.history + 
                        smoke + drink, data = baseline)
#feedback check
multicox_str_1 <- cox.zph(multicox_str) #all meet
multicox_str_1

##                         chisq df     p
## age                    0.4843  1 0.486
## weight                 2.6412  1 0.104
## gender                 2.0114  1 0.156
## liver.disease.history  0.3084  1 0.579
## diabetes.history       0.4944  1 0.482
## smoke                  0.0624  1 0.803
## drink                  1.9767  1 0.160
## GLOBAL                12.9623  7 0.073

##
step(multicox, direction = "backward") #gender, Rule.resistanc,Liver.medicine

## Start:  AIC=319.06
## basesurv ~ age + weight + gender + liver.disease.history + diabetes.history + 
##     smoke + drink
## 
##                         Df    AIC
## - smoke                  1 317.55
## - age                    1 317.80
## - drink                  1 318.08
## - liver.disease.history  1 318.24
## - weight                 1 318.66
## - diabetes.history       1 319.04
## <none>                     319.06
## - gender                 1 319.29
## 
## Step:  AIC=317.55
## basesurv ~ age + weight + gender + liver.disease.history + diabetes.history + 
##     drink
## 
##                         Df    AIC
## - drink                  1 316.15
## - liver.disease.history  1 316.56
## - age                    1 316.82
## - gender                 1 317.31
## <none>                     317.55
## - weight                 1 317.63
## - diabetes.history       1 317.66
## 
## Step:  AIC=316.15
## basesurv ~ age + weight + gender + liver.disease.history + diabetes.history
## 
##                         Df    AIC
## - age                    1 315.03
## - liver.disease.history  1 315.28
## <none>                     316.15
## - gender                 1 316.15
## - diabetes.history       1 316.21
## - weight                 1 316.54
## 
## Step:  AIC=315.03
## basesurv ~ weight + gender + liver.disease.history + diabetes.history
## 
##                         Df    AIC
## - liver.disease.history  1 313.87
## - gender                 1 314.78
## <none>                     315.03
## - weight                 1 315.27
## - diabetes.history       1 316.84
## 
## Step:  AIC=313.87
## basesurv ~ weight + gender + diabetes.history
## 
##                    Df    AIC
## - gender            1 313.78
## - weight            1 313.86
## <none>                313.87
## - diabetes.history  1 315.55
## 
## Step:  AIC=313.78
## basesurv ~ weight + diabetes.history
## 
##                    Df    AIC
## <none>                313.78
## - diabetes.history  1 315.05
## - weight            1 316.38

## Call:
## coxph(formula = basesurv ~ weight + diabetes.history, data = baseline)
## 
##                      coef exp(coef) se(coef)      z      p
## weight            0.04181   1.04269  0.01957  2.137 0.0326
## diabetes.history -1.44162   0.23655  1.01586 -1.419 0.1559
## 
## Likelihood ratio test=7.63  on 2 df, p=0.02207
## n= 139, number of events= 34

#################merge data 
multisum <- summary(multicox_str)
multinames <- as.character(names(multicox_str$coefficients))
mhr <- round(multisum$coefficients[,2],2)
mpv <- round(multisum$coefficients[,5],2)
mcil <- round(multisum$conf.int[,3],2)
mciu <- round(multisum$conf.int[,4],2)
mci <- paste0(mcil,"-",mciu)
mulcox <- data.frame("characteristics" = multinames,
                     "hazard ratio_multi" = mhr,
                     "ci95_multi" = mci,
                     "p value_multi" = mpv)
##########################################################merge COX hazard model___4
#2.4 整合表格
final <- merge.data.frame(univar,mulcox,by = "characteristics", all= T, sort = T)
#2.5 输出
write.csv(final,"final.csv",na= "", row.names = F)
final

##         characteristics Hazard.Ratio      ci95 p.value hazard.ratio_multi
## 1                   age         0.99 0.96-1.01    0.33               0.99
## 2      diabetes.history         0.25 0.03-1.81    0.17               0.29
## 3                 drink         1.54 0.67-3.55    0.31               1.76
## 4                gender         2.38 0.92-6.15    0.07               2.24
## 5 liver.disease.history         1.46  0.6-3.54    0.40               1.71
## 6                 smoke         1.02 0.51-2.06    0.95               0.70
## 7                weight         1.04    1-1.08    0.04               1.03
##   ci95_multi p.value_multi
## 1  0.95-1.02          0.41
## 2  0.04-2.25          0.23
## 3  0.59-5.25          0.31
## 4  0.74-6.73          0.15
## 5  0.68-4.31          0.25
## 6  0.25-1.92          0.49
## 7  0.98-1.08          0.20

#########################################################nomogram_cox_____5
#4列线图制作
library(rms)

## Warning: package 'rms' was built under R version 4.0.4

## Loading required package: Hmisc

## Warning: package 'Hmisc' was built under R version 4.0.4

## Loading required package: lattice

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: SparseM

## Warning: package 'SparseM' was built under R version 4.0.4

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

#4.1
str(data_for_cox)

## 'data.frame':    139 obs. of  9 variables:
##  $ weeks                : num  6 22 9 6 1 8 2 4 4 1 ...
##  $ status               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age                  : num  22 29 31 53 67 33 24 41 25 21 ...
##  $ weight               : num  63 65 56 53 54 63 51 54 62 58 ...
##  $ gender               : num  1 1 1 1 1 1 0 1 1 1 ...
##  $ liver.disease.history: num  0 1 0 0 1 0 0 0 0 0 ...
##  $ diabetes.history     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ smoke                : num  0 1 1 1 1 0 0 0 0 0 ...
##  $ drink                : num  0 0 1 1 0 0 0 0 0 0 ...

help(lung)

## starting httpd help server ...

##  done

## 添加变量标???以便后???说明
data_for_cox$gender <-
  factor(data_for_cox$gender,
         levels = c(0,1),
         labels = c("female","male"))
data_for_cox_datalist <- datadist(data_for_cox)
options(datadist="data_for_cox_datalist")
#View(data_for_cox_datalist)
colnames(data_for_cox)

## [1] "weeks"                 "status"                "age"                  
## [4] "weight"                "gender"                "liver.disease.history"
## [7] "diabetes.history"      "smoke"                 "drink"

multicox_for_ap <- cph(Surv(weeks, status) ~ age + weight + gender + liver.disease.history + diabetes.history + 
                         smoke + drink, x=T, y=T,surv = T,data = data_for_cox)

#4.2
Surv_1 <- Survival(multicox_for_ap)
nom <- nomogram(multicox_for_ap, fun = list(function(x) Surv_1(12, x), function(x) Surv_1(24, x)),
                fun.at = seq(0.,0.95,by = 0.15),lp = F,
                funlabel = c("12-week liver injure","24-weeks liver injure"))

#tiff(filename = "myplot.tif",
#     width = 1250, height = 600, units = "px", pointsize = 12,
#     compression = "lzw",
#     bg = "white", res = NA, family = "", restoreConsole = TRUE,
#     type = c("windows", "cairo"))
plot(nom,xfrac=.15)

#dev.off()
########################################################### 构建logisitc回归模型____6
colnames(data_for_cox)

## [1] "weeks"                 "status"                "age"                  
## [4] "weight"                "gender"                "liver.disease.history"
## [7] "diabetes.history"      "smoke"                 "drink"

f1 <- lrm(status ~ age + weight + gender + liver.disease.history + diabetes.history + 
            smoke + drink, data = data_for_cox)
## 绘制logisitc回归的风险预测值的nomogram图
nom <- nomogram(f1, fun= function(x)1/(1+exp(-x)), # or fun=plogis
                lp=F, funlabel="Risk")
#tiff(filename = "myplot_logistic.tif",
#     width = 1250, height = 600, units = "px", pointsize = 12,
#     compression = "lzw",
#     bg = "white", res = NA, family = "", restoreConsole = TRUE,
#     type = c("windows", "cairo"))
plot(nom,xfrac=.15)

#dev.off()
#########################################################median survival time______7
f2 <- psm(Surv(weeks, status) ~ age + weight + gender + liver.disease.history + diabetes.history + 
            smoke + drink,data = data_for_cox, dist='lognormal')
med <- Quantile(f2) # 计算???位生???时间
surv <- Survival(f2) # 构建生???概率函数
## 绘制COX回归???位生???时间的Nomogram图
nom <- nomogram(f2, fun=function(x) med(lp=x),
                funlabel="Median Survival Time")
#tiff(filename = "Median Survival Time.tif",
#     width = 1250, height = 600, units = "px", pointsize = 12,
#     compression = "lzw",
#     bg = "white", res = NA, family = "", restoreConsole = TRUE,
#     type = c("windows", "cairo"))
plot(nom,xfrac=.15)

#dev.off()
############################################################评价COX回归的预测效果_______8

rcorrcens(Surv(weeks, status) ~ predict(f2), data = data_for_cox)

## 
## Somers' Rank Correlation for Censored Data    Response variable:Surv(weeks, status)
## 
##                 C   Dxy  aDxy    SD    Z     P   n
## predict(f2) 0.664 0.327 0.327 0.084 3.92 1e-04 139

f2 <- psm(Surv(weeks, status) ~ age + weight + gender + liver.disease.history + diabetes.history + 
            smoke + drink,data = data_for_cox,x=T, y=T, dist='lognormal')
## 构建校???曲线
#cal1 <- calibrate(f2, cmethod='KM', method="boot", u=12, m=24, B=40)
cal1 <- calibrate(f2, cmethod='KM', method="boot", u= 12, m=24, B=40)

## Warning in groupkm(psurv, y, u = u, cuts = orig.cuts): one interval had < 2
## observations

## Warning in groupkm(psurv, y, u = u, cuts = orig.cuts): one interval had < 2
## observations

## 绘制校???曲线，??rms::calibrate查看详细参数说明
par(mar=c(8,5,3,2),cex = 1.0)
plot(cal1,lwd=2,lty=1,
     errbar.col=c(rgb(0,118,192,maxColorValue=255)),
     xlim=c(0.25,0.6),ylim=c(0.15,0.70),
     xlab="Nomogram-Predicted Probability of 1-Year DFS",
     ylab="Actual 1-Year DFS (proportion)",
     col=c(rgb(192,98,83,maxColorValue=255)))

########################################################library(survminer)______9
library(survminer)

## Warning: package 'survminer' was built under R version 4.0.4

## Loading required package: ggpubr

colnames(data_for_cox)

## [1] "weeks"                 "status"                "age"                  
## [4] "weight"                "gender"                "liver.disease.history"
## [7] "diabetes.history"      "smoke"                 "drink"

sfit <- survfit(Surv(weeks, status)~gender, data=data_for_cox)
plot(sfit)
ggsurvplot(sfit)

ggsurvplot(sfit, conf.int=F, pval=TRUE, risk.table=F,
           legend.labs=c("Male", "Female"), legend.title="Gender",
           palette=c("dodgerblue2", "orchid2"),
           title="Kaplan‐Meier Curve for Liver injure",
           risk.table.height=.15)

#############
colnames(data_for_cox)

## [1] "weeks"                 "status"                "age"                  
## [4] "weight"                "gender"                "liver.disease.history"
## [7] "diabetes.history"      "smoke"                 "drink"

p <- ggsurvplot(survfit(Surv(weeks, status)~gender,conf.int=T, data=data_for_cox), 
           pval=TRUE,title = "Rule.resistance") 
p$plot + theme(plot.title = element_text(hjust = 0.5, face = "plain")) + xlab("weeks")

ggsave(paste0(Sys.Date(),"-gender",".tiff"), plot = last_plot(), device = NULL, path = "D:\\shuai yi_20180813\\cox\\20181019 newest version_REMOVE LIVER MEDICATION\\",
       scale = 1, width = 10, height = 10, units ="cm",dpi = 300, limitsize = TRUE)

## Warning in grDevices::dev.off(): unable to open TIFF file 'D:\shuai
## yi_20180813\cox\20181019 newest version_REMOVE LIVER MEDICATION\/2021-04-05-
## gender.tiff'