library(tidymodels)
library(tidyverse)
library(ggpubr)
library(ggcorrplot)
library(GGally)
library(mice)
library(survival)
library(survminer)

theme_set(theme_bw())
tidymodels_prefer()

Cirrhosis

Context Cirrhosis is a late stage of scarring (fibrosis) of the liver caused by many forms of liver diseases and conditions, such as hepatitis and chronic alcoholism. The following data contains the information collected from the Mayo Clinic trial in primary biliary cirrhosis (PBC) of the liver conducted between 1974 and 1984. A description of the clinical background for the trial and the covariates recorded here is in Chapter 0, especially Section 0.2 of Fleming and Harrington, Counting Processes and Survival Analysis, Wiley, 1991. A more extended discussion can be found in Dickson, et al., Hepatology 10:1-7 (1989) and in Markus, et al., N Eng J of Med 320:1709-13 (1989).

A total of 424 PBC patients, referred to Mayo Clinic during that ten-year interval, met eligibility criteria for the randomized placebo-controlled trial of the drug D-penicillamine. The first 312 cases in the dataset participated in the randomized trial and contain largely complete data. The additional 112 cases did not participate in the clinical trial but consented to have basic measurements recorded and to be followed for survival. Six of those cases were lost to follow-up shortly after diagnosis, so the data here are on an additional 106 cases as well as the 312 randomized participants.

Attribute Information 1) ID: unique identifier 2) N_Days: number of days between registration and the earlier of death, transplantation, or study analysis time in July 1986 3) Status: status of the patient C (censored), CL (censored due to liver tx), or D (death) 4) Drug: type of drug D-penicillamine or placebo 5) Age: age in [days] 6) Sex: M (male) or F (female) 7) Ascites: presence of ascites N (No) or Y (Yes) 8) Hepatomegaly: presence of hepatomegaly N (No) or Y (Yes) 9) Spiders: presence of spiders N (No) or Y (Yes) 10) Edema: presence of edema N (no edema and no diuretic therapy for edema), S (edema present without diuretics, or edema resolved by diuretics), or Y (edema despite diuretic therapy) 11) Bilirubin: serum bilirubin in [mg/dl] 12) Cholesterol: serum cholesterol in [mg/dl] 13) Albumin: albumin in [gm/dl] 14) Copper: urine copper in [ug/day] 15) Alk_Phos: alkaline phosphatase in [U/liter] 16) SGOT: SGOT in [U/ml] 17) Triglycerides: triglicerides in [mg/dl] 18) Platelets: platelets per cubic [ml/1000] 19) Prothrombin: prothrombin time in seconds [s] 20) Stage: histologic stage of disease (1, 2, 3, or 4)

cirrhosis <-  read_csv('cirrhosis.csv')

glimpse(cirrhosis)
Rows: 418
Columns: 20
$ ID            <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1~
$ N_Days        <dbl> 400, 4500, 1012, 1925, 1504, 2503, 1832, 2466, 2400, 51,~
$ Status        <chr> "D", "C", "D", "D", "CL", "D", "C", "D", "D", "D", "D", ~
$ Drug          <chr> "D-penicillamine", "D-penicillamine", "D-penicillamine",~
$ Age           <dbl> 21464, 20617, 25594, 19994, 13918, 24201, 20284, 19379, ~
$ Sex           <chr> "F", "F", "M", "F", "F", "F", "F", "F", "F", "F", "F", "~
$ Ascites       <chr> "Y", "N", "N", "N", "N", "N", "N", "N", "N", "Y", "N", "~
$ Hepatomegaly  <chr> "Y", "Y", "N", "Y", "Y", "Y", "Y", "N", "N", "N", "Y", "~
$ Spiders       <chr> "Y", "Y", "N", "Y", "Y", "N", "N", "N", "Y", "Y", "Y", "~
$ Edema         <chr> "Y", "N", "S", "S", "N", "N", "N", "N", "N", "Y", "N", "~
$ Bilirubin     <dbl> 14.5, 1.1, 1.4, 1.8, 3.4, 0.8, 1.0, 0.3, 3.2, 12.6, 1.4,~
$ Cholesterol   <dbl> 261, 302, 176, 244, 279, 248, 322, 280, 562, 200, 259, 2~
$ Albumin       <dbl> 2.60, 4.14, 3.48, 2.54, 3.53, 3.98, 4.09, 4.00, 3.08, 2.~
$ Copper        <dbl> 156, 54, 210, 64, 143, 50, 52, 52, 79, 140, 46, 94, 40, ~
$ Alk_Phos      <dbl> 1718.0, 7394.8, 516.0, 6121.8, 671.0, 944.0, 824.0, 4651~
$ SGOT          <dbl> 137.95, 113.52, 96.10, 60.63, 113.15, 93.00, 60.45, 28.3~
$ Tryglicerides <dbl> 172, 88, 55, 92, 72, 63, 213, 189, 88, 143, 79, 95, 130,~
$ Platelets     <dbl> 190, 221, 151, 183, 136, NA, 204, 373, 251, 302, 258, 71~
$ Prothrombin   <dbl> 12.2, 10.6, 12.0, 10.3, 10.9, 11.0, 9.7, 11.0, 11.0, 11.~
$ Stage         <dbl> 4, 3, 4, 4, 3, 3, 3, 3, 2, 4, 4, 4, 3, 4, 3, 3, 4, 4, 3,~
sample_n(cirrhosis, 3)
# A tibble: 3 x 20
     ID N_Days Status Drug          Age Sex   Ascites Hepatomegaly Spiders Edema
  <dbl>  <dbl> <chr>  <chr>       <dbl> <chr> <chr>   <chr>        <chr>   <chr>
1   380   1725 CL     <NA>        12053 F     <NA>    <NA>         <NA>    N    
2   273   1558 C      Placebo     17320 F     N       N            Y       N    
3   252   1770 C      D-penicill~ 25006 F     N       Y            Y       N    
# ... with 10 more variables: Bilirubin <dbl>, Cholesterol <dbl>,
#   Albumin <dbl>, Copper <dbl>, Alk_Phos <dbl>, SGOT <dbl>,
#   Tryglicerides <dbl>, Platelets <dbl>, Prothrombin <dbl>, Stage <dbl>
summary(cirrhosis)
       ID            N_Days        Status              Drug          
 Min.   :  1.0   Min.   :  41   Length:418         Length:418        
 1st Qu.:105.2   1st Qu.:1093   Class :character   Class :character  
 Median :209.5   Median :1730   Mode  :character   Mode  :character  
 Mean   :209.5   Mean   :1918                                        
 3rd Qu.:313.8   3rd Qu.:2614                                        
 Max.   :418.0   Max.   :4795                                        
                                                                     
      Age            Sex              Ascites          Hepatomegaly      
 Min.   : 9598   Length:418         Length:418         Length:418        
 1st Qu.:15644   Class :character   Class :character   Class :character  
 Median :18628   Mode  :character   Mode  :character   Mode  :character  
 Mean   :18533                                                           
 3rd Qu.:21273                                                           
 Max.   :28650                                                           
                                                                         
   Spiders             Edema             Bilirubin       Cholesterol    
 Length:418         Length:418         Min.   : 0.300   Min.   : 120.0  
 Class :character   Class :character   1st Qu.: 0.800   1st Qu.: 249.5  
 Mode  :character   Mode  :character   Median : 1.400   Median : 309.5  
                                       Mean   : 3.221   Mean   : 369.5  
                                       3rd Qu.: 3.400   3rd Qu.: 400.0  
                                       Max.   :28.000   Max.   :1775.0  
                                                        NA's   :134     
    Albumin          Copper          Alk_Phos            SGOT       
 Min.   :1.960   Min.   :  4.00   Min.   :  289.0   Min.   : 26.35  
 1st Qu.:3.243   1st Qu.: 41.25   1st Qu.:  871.5   1st Qu.: 80.60  
 Median :3.530   Median : 73.00   Median : 1259.0   Median :114.70  
 Mean   :3.497   Mean   : 97.65   Mean   : 1982.7   Mean   :122.56  
 3rd Qu.:3.770   3rd Qu.:123.00   3rd Qu.: 1980.0   3rd Qu.:151.90  
 Max.   :4.640   Max.   :588.00   Max.   :13862.4   Max.   :457.25  
                 NA's   :108      NA's   :106       NA's   :106     
 Tryglicerides      Platelets      Prothrombin        Stage      
 Min.   : 33.00   Min.   : 62.0   Min.   : 9.00   Min.   :1.000  
 1st Qu.: 84.25   1st Qu.:188.5   1st Qu.:10.00   1st Qu.:2.000  
 Median :108.00   Median :251.0   Median :10.60   Median :3.000  
 Mean   :124.70   Mean   :257.0   Mean   :10.73   Mean   :3.024  
 3rd Qu.:151.00   3rd Qu.:318.0   3rd Qu.:11.10   3rd Qu.:4.000  
 Max.   :598.00   Max.   :721.0   Max.   :18.00   Max.   :4.000  
 NA's   :136      NA's   :11      NA's   :2       NA's   :6      
cirrhosis <- cirrhosis %>% 
  mutate(Age = round(Age/365, 1))

cirrhosis <- cirrhosis %>% 
  mutate(Stage = as_factor(Stage))

We see some NAs

md.pattern(cirrhosis)

    ID N_Days Status Age Sex Edema Bilirubin Albumin Prothrombin Stage
276  1      1      1   1   1     1         1       1           1     1
2    1      1      1   1   1     1         1       1           1     1
28   1      1      1   1   1     1         1       1           1     1
2    1      1      1   1   1     1         1       1           1     1
91   1      1      1   1   1     1         1       1           1     1
4    1      1      1   1   1     1         1       1           1     1
7    1      1      1   1   1     1         1       1           1     1
6    1      1      1   1   1     1         1       1           1     0
2    1      1      1   1   1     1         1       1           0     1
     0      0      0   0   0     0         0       0           2     6
    Platelets Drug Ascites Hepatomegaly Spiders Alk_Phos SGOT Copper
276         1    1       1            1       1        1    1      1
2           1    1       1            1       1        1    1      1
28          1    1       1            1       1        1    1      1
2           1    1       1            1       1        1    1      0
91          1    0       0            0       0        0    0      0
4           0    1       1            1       1        1    1      1
7           0    0       0            0       0        0    0      0
6           1    0       0            0       0        0    0      0
2           1    0       0            0       0        0    0      0
           11  106     106          106     106      106  106    108
    Cholesterol Tryglicerides     
276           1             1    0
2             1             0    1
28            0             0    2
2             1             1    1
91            0             0    9
4             1             1    1
7             0             0   10
6             0             0   10
2             0             0   10
            134           136 1033
# Delete the NAs in variable Drug
cirrhosis <- cirrhosis %>% 
  filter(!is.na(Drug))

dim(cirrhosis)
[1] 312  20
# Imputing numerical variables with rf

cirrhosis_mice <- mice(cirrhosis, m = 5, method = "rf")

 iter imp variable
  1   1  Cholesterol  Copper  Tryglicerides  Platelets
  1   2  Cholesterol  Copper  Tryglicerides  Platelets
  1   3  Cholesterol  Copper  Tryglicerides  Platelets
  1   4  Cholesterol  Copper  Tryglicerides  Platelets
  1   5  Cholesterol  Copper  Tryglicerides  Platelets
  2   1  Cholesterol  Copper  Tryglicerides  Platelets
  2   2  Cholesterol  Copper  Tryglicerides  Platelets
  2   3  Cholesterol  Copper  Tryglicerides  Platelets
  2   4  Cholesterol  Copper  Tryglicerides  Platelets
  2   5  Cholesterol  Copper  Tryglicerides  Platelets
  3   1  Cholesterol  Copper  Tryglicerides  Platelets
  3   2  Cholesterol  Copper  Tryglicerides  Platelets
  3   3  Cholesterol  Copper  Tryglicerides  Platelets
  3   4  Cholesterol  Copper  Tryglicerides  Platelets
  3   5  Cholesterol  Copper  Tryglicerides  Platelets
  4   1  Cholesterol  Copper  Tryglicerides  Platelets
  4   2  Cholesterol  Copper  Tryglicerides  Platelets
  4   3  Cholesterol  Copper  Tryglicerides  Platelets
  4   4  Cholesterol  Copper  Tryglicerides  Platelets
  4   5  Cholesterol  Copper  Tryglicerides  Platelets
  5   1  Cholesterol  Copper  Tryglicerides  Platelets
  5   2  Cholesterol  Copper  Tryglicerides  Platelets
  5   3  Cholesterol  Copper  Tryglicerides  Platelets
  5   4  Cholesterol  Copper  Tryglicerides  Platelets
  5   5  Cholesterol  Copper  Tryglicerides  Platelets
cirrhosis_mice
Class: mids
Number of multiple imputations:  5 
Imputation methods:
           ID        N_Days        Status          Drug           Age 
           ""            ""            ""            ""            "" 
          Sex       Ascites  Hepatomegaly       Spiders         Edema 
           ""            ""            ""            ""            "" 
    Bilirubin   Cholesterol       Albumin        Copper      Alk_Phos 
           ""          "rf"            ""          "rf"            "" 
         SGOT Tryglicerides     Platelets   Prothrombin         Stage 
           ""          "rf"          "rf"            ""            "" 
PredictorMatrix:
       ID N_Days Status Drug Age Sex Ascites Hepatomegaly Spiders Edema
ID      0      1      0    0   1   0       0            0       0     0
N_Days  1      0      0    0   1   0       0            0       0     0
Status  1      1      0    0   1   0       0            0       0     0
Drug    1      1      0    0   1   0       0            0       0     0
Age     1      1      0    0   0   0       0            0       0     0
Sex     1      1      0    0   1   0       0            0       0     0
       Bilirubin Cholesterol Albumin Copper Alk_Phos SGOT Tryglicerides
ID             1           1       1      1        1    1             1
N_Days         1           1       1      1        1    1             1
Status         1           1       1      1        1    1             1
Drug           1           1       1      1        1    1             1
Age            1           1       1      1        1    1             1
Sex            1           1       1      1        1    1             1
       Platelets Prothrombin Stage
ID             1           1     1
N_Days         1           1     1
Status         1           1     1
Drug           1           1     1
Age            1           1     1
Sex            1           1     1
Number of logged events:  7 
  it im dep     meth          out
1  0  0     constant       Status
2  0  0     constant         Drug
3  0  0     constant          Sex
4  0  0     constant      Ascites
5  0  0     constant Hepatomegaly
6  0  0     constant      Spiders
cirrhosis_imp<- complete(cirrhosis_mice, 3)

summary(cirrhosis_imp)
       ID             N_Days        Status              Drug          
 Min.   :  1.00   Min.   :  41   Length:312         Length:312        
 1st Qu.: 78.75   1st Qu.:1191   Class :character   Class :character  
 Median :156.50   Median :1840   Mode  :character   Mode  :character  
 Mean   :156.50   Mean   :2006                                        
 3rd Qu.:234.25   3rd Qu.:2697                                        
 Max.   :312.00   Max.   :4556                                        
      Age            Sex              Ascites          Hepatomegaly      
 Min.   :26.30   Length:312         Length:312         Length:312        
 1st Qu.:42.30   Class :character   Class :character   Class :character  
 Median :49.85   Mode  :character   Mode  :character   Mode  :character  
 Mean   :50.05                                                           
 3rd Qu.:56.73                                                           
 Max.   :78.50                                                           
   Spiders             Edema             Bilirubin       Cholesterol    
 Length:312         Length:312         Min.   : 0.300   Min.   : 120.0  
 Class :character   Class :character   1st Qu.: 0.800   1st Qu.: 248.0  
 Mode  :character   Mode  :character   Median : 1.350   Median : 309.5  
                                       Mean   : 3.256   Mean   : 367.2  
                                       3rd Qu.: 3.425   3rd Qu.: 399.2  
                                       Max.   :28.000   Max.   :1775.0  
    Albumin         Copper          Alk_Phos            SGOT       
 Min.   :1.96   Min.   :  4.00   Min.   :  289.0   Min.   : 26.35  
 1st Qu.:3.31   1st Qu.: 41.75   1st Qu.:  871.5   1st Qu.: 80.60  
 Median :3.55   Median : 73.00   Median : 1259.0   Median :114.70  
 Mean   :3.52   Mean   : 97.59   Mean   : 1982.7   Mean   :122.56  
 3rd Qu.:3.80   3rd Qu.:123.00   3rd Qu.: 1980.0   3rd Qu.:151.90  
 Max.   :4.64   Max.   :588.00   Max.   :13862.4   Max.   :457.25  
 Tryglicerides     Platelets      Prothrombin    Stage  
 Min.   : 33.0   Min.   : 62.0   Min.   : 9.00   1: 16  
 1st Qu.: 85.0   1st Qu.:200.0   1st Qu.:10.00   2: 67  
 Median :108.0   Median :258.5   Median :10.60   3:120  
 Mean   :124.8   Mean   :262.5   Mean   :10.73   4:109  
 3rd Qu.:151.0   3rd Qu.:324.0   3rd Qu.:11.10          
 Max.   :598.0   Max.   :563.0   Max.   :17.10          
cirrhosis_imp %>% 
  select(-ID) %>% 
  select(Age, Bilirubin, Cholesterol, Albumin, Copper, Alk_Phos, SGOT, Tryglicerides, Platelets, Prothrombin, Stage, Drug, Sex) %>% 
  ggpairs(aes(fill = Drug))

cirrhosis_imp %>% 
  select(is.numeric) %>% 
  cor() %>% 
  ggcorrplot(type = "upper",
             hc.order = T,
             lab = T, 
             sig.level = .5)

df <- cirrhosis_imp %>% 
  group_by(Sex, Stage) %>% 
  summarise(counts = n())

ggplot(df, aes(x = Stage, y = counts)) + 
  geom_bar(aes(fill = Sex), stat = "identity", position = "dodge") + 
  geom_text(aes(label = counts, group = Sex), position = position_dodge(0.9), vjust = -.3, size = 3.5) + 
  scale_fill_manual(values = c("#EEAB5F", "#EE5F93"))

ggplot(cirrhosis_imp, aes(N_Days, Cholesterol, color = Sex)) + 
  geom_line()

cirrhosis_imp %>% 
  select(-ID) %>% 
  select(Age, Bilirubin, Cholesterol, Albumin, Copper, Alk_Phos, SGOT, Tryglicerides, Platelets, Prothrombin, Stage, Drug, Sex) %>% 
  ggpairs(aes(fill = Drug))

cirrhosis_imp %>% 
  select(is.numeric) %>% 
  cor() %>% 
  ggcorrplot(type = "upper",
             hc.order = T,
             lab = T, 
             sig.level = .5)

df <- cirrhosis_imp %>% 
  group_by(Sex, Stage) %>% 
  summarise(counts = n())

ggplot(df, aes(x = Stage, y = counts)) + 
  geom_bar(aes(fill = Sex), stat = "identity", position = "dodge") + 
  geom_text(aes(label = counts, group = Sex), position = position_dodge(0.9), vjust = -.3, size = 3.5) + 
  scale_fill_manual(values = c("#EEAB5F", "#EE5F93"))

ggplot(cirrhosis_imp, aes(N_Days, Cholesterol, color = Sex)) + 
  geom_line()

cirrhosis_imp %>% 
  count(Status)
  Status   n
1      C 168
2     CL  19
3      D 125
cirrhosis_surv <- cirrhosis_imp %>% 
  mutate(Status = if_else(Status == "D", 1, 0))


surv_obj <- Surv(cirrhosis_surv$N_Days, cirrhosis_surv$Status)
fit_mono <- survfit(surv_obj ~ 1, data = cirrhosis_surv)
fit_mono
Call: survfit(formula = surv_obj ~ 1, data = cirrhosis_surv)

      n  events  median 0.95LCL 0.95UCL 
    312     125    3395    3086    3853 
ggsurvplot(fit_mono, color = "#2E9FDF")

fit_comp_drug <- survfit(surv_obj ~ Drug, data = cirrhosis_surv)
fit_comp_drug
Call: survfit(formula = surv_obj ~ Drug, data = cirrhosis_surv)

                       n events median 0.95LCL 0.95UCL
Drug=D-penicillamine 158     65   3282    2583      NA
Drug=Placebo         154     60   3428    3090      NA
ggsurvplot(fit_comp_drug, 
           conf.int = T, 
           pval = T, 
           risk.table = T, 
           surv.median.line = "hv",
           palette = c("#EE5F93", "#2E9FDF"))

survdiff(surv_obj ~ Drug, data = cirrhosis_surv)
Call:
survdiff(formula = surv_obj ~ Drug, data = cirrhosis_surv)

                       N Observed Expected (O-E)^2/E (O-E)^2/V
Drug=D-penicillamine 158       65     63.2    0.0502     0.102
Drug=Placebo         154       60     61.8    0.0513     0.102

 Chisq= 0.1  on 1 degrees of freedom, p= 0.7 
coxph(surv_obj ~ ., data = cirrhosis_surv) 
Call:
coxph(formula = surv_obj ~ ., data = cirrhosis_surv)

                    coef  exp(coef)   se(coef)      z        p
ID             2.856e-03  1.003e+00  3.656e-03  0.781   0.4346
N_Days        -3.321e-01  7.174e-01  7.220e-02 -4.599 4.24e-06
Status         1.073e+01  4.562e+04  1.703e+01  0.630   0.5286
DrugPlacebo    5.334e-01  1.705e+00  6.491e-01  0.822   0.4113
Age           -4.843e-02  9.527e-01  3.611e-02 -1.341   0.1799
SexM           3.133e-01  1.368e+00  8.565e-01  0.366   0.7145
AscitesY       1.611e-03  1.002e+00  7.432e-01  0.002   0.9983
HepatomegalyY  5.921e-01  1.808e+00  8.087e-01  0.732   0.4640
SpidersY       2.999e-01  1.350e+00  6.013e-01  0.499   0.6180
EdemaS        -2.054e+00  1.282e-01  8.945e-01 -2.296   0.0217
EdemaY         5.863e-01  1.797e+00  8.248e-01  0.711   0.4772
Bilirubin     -2.604e-02  9.743e-01  5.660e-02 -0.460   0.6455
Cholesterol   -6.373e-04  9.994e-01  1.277e-03 -0.499   0.6176
Albumin        6.676e-01  1.949e+00  7.314e-01  0.913   0.3614
Copper         3.523e-04  1.000e+00  2.441e-03  0.144   0.8852
Alk_Phos      -2.209e-04  9.998e-01  2.076e-04 -1.064   0.2874
SGOT           9.046e-04  1.001e+00  5.622e-03  0.161   0.8722
Tryglicerides  5.399e-04  1.001e+00  2.784e-03  0.194   0.8462
Platelets     -2.752e-04  9.997e-01  3.860e-03 -0.071   0.9432
Prothrombin    2.501e-01  1.284e+00  3.804e-01  0.657   0.5110
Stage2        -5.204e+00  5.492e-03  1.208e+00 -4.310 1.63e-05
Stage3        -6.071e+00  2.309e-03  7.218e-01 -8.411  < 2e-16
Stage4        -5.897e+00  2.747e-03  6.285e-01 -9.382  < 2e-16

Likelihood ratio test=1266  on 23 df, p=< 2.2e-16
n= 312, number of events= 125 
cox_model <- coxph(surv_obj ~ Drug + Stage + Edema, data = cirrhosis_surv)
summary(cox_model)
Call:
coxph(formula = surv_obj ~ Drug + Stage + Edema, data = cirrhosis_surv)

  n= 312, number of events= 125 

               coef exp(coef) se(coef)      z Pr(>|z|)    
DrugPlacebo -0.1393    0.8700   0.1833 -0.760  0.44732    
Stage2       1.6081    4.9935   1.0317  1.559  0.11906    
Stage3       2.0246    7.5734   1.0143  1.996  0.04592 *  
Stage4       2.8305   16.9531   1.0126  2.795  0.00519 ** 
EdemaS       0.6093    1.8392   0.2710  2.248  0.02455 *  
EdemaY       2.0281    7.5996   0.2707  7.492 6.78e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

            exp(coef) exp(-coef) lower .95 upper .95
DrugPlacebo     0.870    1.14942    0.6075     1.246
Stage2          4.993    0.20026    0.6610    37.721
Stage3          7.573    0.13204    1.0374    55.288
Stage4         16.953    0.05899    2.3299   123.359
EdemaS          1.839    0.54371    1.0813     3.128
EdemaY          7.600    0.13159    4.4707    12.918

Concordance= 0.755  (se = 0.022 )
Likelihood ratio test= 94.65  on 6 df,   p=<2e-16
Wald test            = 108.3  on 6 df,   p=<2e-16
Score (logrank) test = 161.8  on 6 df,   p=<2e-16
fit_comp_sex <- survfit(surv_obj ~ Sex, data = cirrhosis_surv)
fit_comp_sex
Call: survfit(formula = surv_obj ~ Sex, data = cirrhosis_surv)

        n events median 0.95LCL 0.95UCL
Sex=F 276    103   3428    3170      NA
Sex=M  36     22   2386    1297      NA
ggsurvplot(fit_comp_sex, 
           conf.int = T, 
           pval = T, 
           risk.table = T, 
           surv.median.line = "hv",
           palette = c("#EE5F93", "#2E9FDF"))

survdiff(surv_obj ~ Sex, data = cirrhosis_surv)
Call:
survdiff(formula = surv_obj ~ Sex, data = cirrhosis_surv)

        N Observed Expected (O-E)^2/E (O-E)^2/V
Sex=F 276      103    110.4     0.494      4.27
Sex=M  36       22     14.6     3.728      4.27

 Chisq= 4.3  on 1 degrees of freedom, p= 0.04 
coxph(surv_obj ~ ., data = cirrhosis_surv) 
Call:
coxph(formula = surv_obj ~ ., data = cirrhosis_surv)

                    coef  exp(coef)   se(coef)      z        p
ID             2.856e-03  1.003e+00  3.656e-03  0.781   0.4346
N_Days        -3.321e-01  7.174e-01  7.220e-02 -4.599 4.24e-06
Status         1.073e+01  4.562e+04  1.703e+01  0.630   0.5286
DrugPlacebo    5.334e-01  1.705e+00  6.491e-01  0.822   0.4113
Age           -4.843e-02  9.527e-01  3.611e-02 -1.341   0.1799
SexM           3.133e-01  1.368e+00  8.565e-01  0.366   0.7145
AscitesY       1.611e-03  1.002e+00  7.432e-01  0.002   0.9983
HepatomegalyY  5.921e-01  1.808e+00  8.087e-01  0.732   0.4640
SpidersY       2.999e-01  1.350e+00  6.013e-01  0.499   0.6180
EdemaS        -2.054e+00  1.282e-01  8.945e-01 -2.296   0.0217
EdemaY         5.863e-01  1.797e+00  8.248e-01  0.711   0.4772
Bilirubin     -2.604e-02  9.743e-01  5.660e-02 -0.460   0.6455
Cholesterol   -6.373e-04  9.994e-01  1.277e-03 -0.499   0.6176
Albumin        6.676e-01  1.949e+00  7.314e-01  0.913   0.3614
Copper         3.523e-04  1.000e+00  2.441e-03  0.144   0.8852
Alk_Phos      -2.209e-04  9.998e-01  2.076e-04 -1.064   0.2874
SGOT           9.046e-04  1.001e+00  5.622e-03  0.161   0.8722
Tryglicerides  5.399e-04  1.001e+00  2.784e-03  0.194   0.8462
Platelets     -2.752e-04  9.997e-01  3.860e-03 -0.071   0.9432
Prothrombin    2.501e-01  1.284e+00  3.804e-01  0.657   0.5110
Stage2        -5.204e+00  5.492e-03  1.208e+00 -4.310 1.63e-05
Stage3        -6.071e+00  2.309e-03  7.218e-01 -8.411  < 2e-16
Stage4        -5.897e+00  2.747e-03  6.285e-01 -9.382  < 2e-16

Likelihood ratio test=1266  on 23 df, p=< 2.2e-16
n= 312, number of events= 125 
cox_model <- coxph(surv_obj ~ Drug + Stage + Edema, data = cirrhosis_surv)
summary(cox_model)
Call:
coxph(formula = surv_obj ~ Drug + Stage + Edema, data = cirrhosis_surv)

  n= 312, number of events= 125 

               coef exp(coef) se(coef)      z Pr(>|z|)    
DrugPlacebo -0.1393    0.8700   0.1833 -0.760  0.44732    
Stage2       1.6081    4.9935   1.0317  1.559  0.11906    
Stage3       2.0246    7.5734   1.0143  1.996  0.04592 *  
Stage4       2.8305   16.9531   1.0126  2.795  0.00519 ** 
EdemaS       0.6093    1.8392   0.2710  2.248  0.02455 *  
EdemaY       2.0281    7.5996   0.2707  7.492 6.78e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

            exp(coef) exp(-coef) lower .95 upper .95
DrugPlacebo     0.870    1.14942    0.6075     1.246
Stage2          4.993    0.20026    0.6610    37.721
Stage3          7.573    0.13204    1.0374    55.288
Stage4         16.953    0.05899    2.3299   123.359
EdemaS          1.839    0.54371    1.0813     3.128
EdemaY          7.600    0.13159    4.4707    12.918

Concordance= 0.755  (se = 0.022 )
Likelihood ratio test= 94.65  on 6 df,   p=<2e-16
Wald test            = 108.3  on 6 df,   p=<2e-16
Score (logrank) test = 161.8  on 6 df,   p=<2e-16
ggboxplot(cirrhosis_surv, x= "Sex", y = "Cholesterol", color = "Sex") + 
  stat_compare_means()  

ggplot(cirrhosis_surv, aes(Sex, Cholesterol, color = Sex)) + 
  geom_boxplot() + 
  theme(legend.position = "none") + 
  stat_compare_means(aes(label = ..p.signif..), label.y = 1500, label.x = 1.5)