rm(list=ls())

library(ggplot2)
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(survival);
library(splines);
library(sas7bdat);
library(tidyr); 
library(dplyr);
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Gmisc)
## Loading required package: Rcpp
## Loading required package: htmlTable
library(Greg)
## Loading required package: forestplot
## Loading required package: grid
## Loading required package: checkmate
## Loading required package: abind
library(ggrcs)

library(readr)
library(haven) 
library(gtsummary)

Preparing data

setwd("~/Desktop/WorkingFromHome/X24091_1_analysis/Graph/SurvivalPlots/")
getwd()
## [1] "/Users/weiquntong/Desktop/WorkingFromHome/X24091_1_analysis/Graph/SurvivalPlots"
aim2dt<- read.sas7bdat("forplot.sas7bdat")%>% 
  rename_with(tolower) #This makes all variables lower case.
head(aim2dt)
##      ccsid visit  bmi waistcm ageatvis status  bri_new     fu_yr agecat bmi_cat
## 1 10100026    31 27.9    91.6       49      1 4.273455 0.0000000      2       3
## 2 10100026    32 27.0    97.0       49      1 4.955841 0.7255305      2       3
## 3 10100038    31 28.7    91.0       28      1 4.739535 0.0000000      1       3
## 4 10100038    32 30.3    94.3       29      1 5.189538 0.4763860      1       4
## 5 10100038    34 30.8    83.6       30      1 3.790555 1.8836413      1       4
## 6 10100038    38 34.4    91.7       32      1 5.031597 3.9315537      1       4
##   waist_cat dthevt starttime1 stoptime1 stoptimeyr   bri_new1     bri_new2
## 1         3      0          0       265  0.7255305 0.08748815 5.240674e-03
## 2         3      0        265       445  1.2183436 0.15837640 3.047604e-02
## 3         3      0          0       174  0.4763860 0.13364534 2.021626e-02
## 4         3      0        174       688  1.8836413 0.18745377 4.391869e-02
## 5         2      0        688      1436  3.9315537 0.04994062 6.812223e-09
## 6         3      0       1436      1584  4.3367556 0.16753387 3.456534e-02
##       bri_new3 bri_quintiles
## 1 0.000000e+00             2
## 2 0.000000e+00             3
## 3 0.000000e+00             2
## 4 1.095293e-07             3
## 5 0.000000e+00             1
## 6 0.000000e+00             3

Quintiles of BRI

#Determine quintile cutpoints  
(quintile_cutpoints <-aim2dt %>% 
    group_by(bri_quintiles) %>% 
    mutate(across(is.numeric, ~round(.x, 2))) %>%   # Round all variables to 2 decimal places.
    summarize(
      min = min(bri_new),
      max = max(bri_new),
      range = paste0(min,'-',max)        
    )
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(is.numeric, ~round(.x, 2))`.
## Caused by warning:
## ! Use of bare predicate functions was deprecated in tidyselect 1.1.0.
## ℹ Please use wrap predicates in `where()` instead.
##   # Was:
##   data %>% select(is.numeric)
## 
##   # Now:
##   data %>% select(where(is.numeric))
## # A tibble: 5 × 4
##   bri_quintiles   min   max range    
##           <dbl> <dbl> <dbl> <chr>    
## 1             0  0.21  3.16 0.21-3.16
## 2             1  3.16  3.99 3.16-3.99
## 3             2  3.99  4.9  3.99-4.9 
## 4             3  4.9   6.31 4.9-6.31 
## 5             4  6.31 21.6  6.31-21.6
#This applies meaningful labels to the quintiles and makes R treat them as disjoint categorical variables rather than as a continuous covariate
aim2dt <- aim2dt %>%  
  mutate(bri_quintiles = factor(bri_quintiles, 
                                levels = c(0:4),
                                labels = c('1st (0.21-3.15)', 
                                           '2nd (3.15-3.99)', 
                                           '3rd (3.99-4.89)',
                                           '4th (4.9-6.31)',
                                           '5th (6.31-21.6)')
  ) %>%
    relevel(ref = 3)
  )

Create Restricted Cubic Splines plot

dd <- datadist(aim2dt)
options(datadist='dd')
# Fit cox model by CPH function
cox_model_rcs <- cph(Surv(starttime1, stoptime1, dthevt) ~ rcs(bri_new, 4),
                     id = ccsid, 
                     x= TRUE,
                     y = TRUE,
                     data = aim2dt
) 
ggrcs(data = aim2dt[1:100000,], 
      fit = cox_model_rcs,
      x = 'bri_new') +
  labs(title = 'The relationship between body roundness and predicted probabilty of death',
       x = 'Body Roundness'
       
  )

Cox Models by COXPH functioin

cox_model_bri <-coxph(
  Surv(starttime1, stoptime1, dthevt) ~ bri_quintiles,  data = aim2dt,id=ccsid, robust = TRUE
) 
cox_model_bri
## Call:
## coxph(formula = Surv(starttime1, stoptime1, dthevt) ~ bri_quintiles, 
##     data = aim2dt, robust = TRUE, id = ccsid)
## 
##                                  coef exp(coef) se(coef) robust se      z
## bri_quintiles1st (0.21-3.15)  0.58965   1.80335  0.08749   0.08773  6.721
## bri_quintiles2nd (3.15-3.99)  0.05470   1.05622  0.09647   0.09582  0.571
## bri_quintiles4th (4.9-6.31)  -0.15818   0.85370  0.09900   0.09759 -1.621
## bri_quintiles5th (6.31-21.6) -0.08396   0.91947  0.09611   0.09556 -0.879
##                                    p
## bri_quintiles1st (0.21-3.15) 1.8e-11
## bri_quintiles2nd (3.15-3.99)   0.568
## bri_quintiles4th (4.9-6.31)    0.105
## bri_quintiles5th (6.31-21.6)   0.380
## 
## Likelihood ratio test=92.64  on 4 df, p=< 2.2e-16
## n= 105235, unique id= 5886, number of events= 1200
# Add age at visit as covariate
cox_model_bri1 <-coxph(
  Surv(starttime1, stoptime1, dthevt) ~ bri_quintiles+ageatvis,  data = aim2dt,id=ccsid, robust = TRUE
) 
cox_model_bri1
## Call:
## coxph(formula = Surv(starttime1, stoptime1, dthevt) ~ bri_quintiles + 
##     ageatvis, data = aim2dt, robust = TRUE, id = ccsid)
## 
##                                   coef exp(coef)  se(coef) robust se      z
## bri_quintiles1st (0.21-3.15)  0.674974  1.963982  0.087931  0.088497  7.627
## bri_quintiles2nd (3.15-3.99)  0.069509  1.071981  0.096479  0.095749  0.726
## bri_quintiles4th (4.9-6.31)  -0.146851  0.863422  0.099011  0.097324 -1.509
## bri_quintiles5th (6.31-21.6) -0.019872  0.980325  0.096395  0.095919 -0.207
## ageatvis                      0.028873  1.029294  0.002962  0.002803 10.303
##                                    p
## bri_quintiles1st (0.21-3.15) 2.4e-14
## bri_quintiles2nd (3.15-3.99)   0.468
## bri_quintiles4th (4.9-6.31)    0.131
## bri_quintiles5th (6.31-21.6)   0.836
## ageatvis                     < 2e-16
## 
## Likelihood ratio test=187  on 5 df, p=< 2.2e-16
## n= 105235, unique id= 5886, number of events= 1200
(cox_model_bri_tbl <-tbl_regression(cox_model_bri, exponentiate = TRUE))
Characteristic HR 95% CI p-value
bri_quintiles


    3rd (3.99-4.89)
    1st (0.21-3.15) 1.80 1.52, 2.14 <0.001
    2nd (3.15-3.99) 1.06 0.88, 1.27 0.6
    4th (4.9-6.31) 0.85 0.71, 1.03 0.11
    5th (6.31-21.6) 0.92 0.76, 1.11 0.4
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio

Proportional Hazards Tests

phtestbri <- cox.zph(cox_model_bri)
print(phtestbri)
##               chisq df    p
## bri_quintiles 0.761  4 0.94
## GLOBAL        0.761  4 0.94
phtestbri1 <- cox.zph(cox_model_bri1)
print(phtestbri1)
##               chisq df    p
## bri_quintiles 0.931  4 0.92
## ageatvis      2.504  1 0.11
## GLOBAL        3.680  5 0.60