#install.packages("survival")
#install.packages("survminer")
library("survival")
## Warning: package 'survival' was built under R version 3.4.4
library("survminer")
## Warning: package 'survminer' was built under R version 3.4.4
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 3.4.4
## Loading required package: magrittr
gbcs<-read.table("C://Users/icomb/OneDrive/2017 Fall/Survival Analysis/gbcs.txt",1)
head(gbcs)
##   ID  DateDiag   DateRec DateDeath Age Menopause HormoneTherapy
## 1  1 17aug1984 15apr1988 16nov1990  38         1              1
## 2  2 25apr1985 15mar1989 22oct1990  52         1              1
## 3  3 11oct1984 12apr1988 06oct1988  47         1              1
## 4  4 29jun1984 24nov1984 24nov1984  40         1              1
## 5  5 03jul1984 09aug1989 09aug1989  64         2              2
## 6  6 24jul1984 08nov1989 08nov1989  49         2              2
##   TumorSize.mm. TumorGrade NumberNodes ProgReceptor EstoReceptor TimeRecur
## 1            18          3           5          141          105      1337
## 2            20          1           1           78           14      1420
## 3            30          2           1          422           89      1279
## 4            24          1           3           25           11       148
## 5            19          2           1           19            9      1863
## 6            56          1           3          356           64      1933
##   RecurCensor  TTD Censor
## 1           1 2282      0
## 2           1 2006      0
## 3           1 1456      1
## 4           0  148      0
## 5           0 1863      0
## 6           0 1933      0

Model with 1 variable

The model is ln(HR)=0.364(Hormonetherapy)

hypothesis testing, with a test statistic of 8.4669 and a pvalue of 0.0036, at 5% significance level, we reject the null hypothesis

HR = exp(0.364) = 1.439 The hazard rate for those with hormone therapy 1 is 1.439 times greater than those in hormone therapy 2

model1<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1), data=gbcs)
summary(model1)
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ (HormoneTherapy == 
##     1), data = gbcs)
## 
##   n= 686, number of events= 299 
## 
##                          coef exp(coef) se(coef)     z Pr(>|z|)   
## HormoneTherapy == 1TRUE 0.364     1.439    0.125 2.911   0.0036 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## HormoneTherapy == 1TRUE     1.439     0.6949     1.126     1.839
## 
## Concordance= 0.543  (se = 0.015 )
## Rsquare= 0.013   (max possible= 0.995 )
## Likelihood ratio test= 8.82  on 1 df,   p=0.002977
## Wald test            = 8.47  on 1 df,   p=0.003602
## Score (logrank) test = 8.57  on 1 df,   p=0.003425

Plot of the survival curve

fit<-survfit(Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1), data=gbcs)
ggsurvplot(fit, data=gbcs, legend.labs=c("Hormone Therapy=1", "Hormone Therapy=2"), pval=TRUE)

Variable Selection

step 1: univariate models

age_mod<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~Age, data=gbcs)
age_mod
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ Age, data = gbcs)
## 
##         coef exp(coef) se(coef)     z    p
## Age -0.00448   0.99553  0.00589 -0.76 0.45
## 
## Likelihood ratio test=0.58  on 1 df, p=0.446
## n= 686, number of events= 299
#With a chisq = 0.58 At significance level of 0.15 and p-value of 0.446, Age is not a significant variable.

meno_mod<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~Menopause, data=gbcs)
meno_mod
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ Menopause, 
##     data = gbcs)
## 
##             coef exp(coef) se(coef)    z   p
## Menopause 0.0627    1.0647   0.1182 0.53 0.6
## 
## Likelihood ratio test=0.28  on 1 df, p=0.595
## n= 686, number of events= 299
#With a chisq = 0.28 At significance level of 0.15 and p-value of 0.595, Menopause is not a significant variable

TG_mod<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
TG_mod
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ relevel(as.factor(TumorGrade), 
##     ref = "3"), data = gbcs)
## 
##                                              coef exp(coef) se(coef)     z
## relevel(as.factor(TumorGrade), ref = "3")1 -1.154     0.315    0.262 -4.41
## relevel(as.factor(TumorGrade), ref = "3")2 -0.282     0.754    0.133 -2.12
##                                                p
## relevel(as.factor(TumorGrade), ref = "3")1 1e-05
## relevel(as.factor(TumorGrade), ref = "3")2 0.034
## 
## Likelihood ratio test=24.2  on 2 df, p=5.47e-06
## n= 686, number of events= 299
#   With a chisq = 20 At significance level of 0.15 and p-value of <0.001, Tumorsize is a significant variable.

TS_mod<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~TumorSize.mm., data=gbcs)
TS_mod
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ TumorSize.mm., 
##     data = gbcs)
## 
##                  coef exp(coef) se(coef)    z       p
## TumorSize.mm. 0.01484   1.01495  0.00351 4.23 2.3e-05
## 
## Likelihood ratio test=15.7  on 1 df, p=7.49e-05
## n= 686, number of events= 299
#With a chisq = 15.7 At significance level of 0.15 and p-value of <0.001, Tumorgrade is a significant variable.

step 2: model with TG and TS and HT

mod1<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1)+TumorSize.mm.+relevel(as.factor(TumorGrade), ref="3"), data=gbcs)

twologL=-2*mod1$loglik[2]

#dropping Tum0rSize
mod2<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1)+relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
twologL_2=-2*mod2$loglik[2]

chisq= twologL_2-twologL
chisq
## [1] 13.987
#df=1. chisq = 3.84 at 0.05. Tumorsize is significant


#droptting Tumorgrade
mod3<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1)+TumorSize.mm., data=gbcs)
twologL_3=-2*mod3$loglik[2]

chisq= twologL_3-twologL
chisq
## [1] 20.90137
#df=2, chisq= 5.991 at 0.05. Tumorgrade is significant

#dropping Hormonetherapy
mod4<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~TumorSize.mm.+relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
twologL_4=-2*mod4$loglik[2]

chisq= twologL_4-twologL
chisq
## [1] 7.913646
#df=1, chisq= 3.84 at 0.05. Hormone Therapy is significant.

Step 3; add each variable dropped in step 1

#adding age
mod5<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~Age+(HormoneTherapy==1)+TumorSize.mm.+relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
twologL_5=-2*mod5$loglik[2]

chisq= twologL-twologL_5
chisq
## [1] 0.03535268
#df =1,chisq= 3.84 at 0.05. age not significant


#adding menopause
mod6<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~Menopause+(HormoneTherapy==1)+TumorSize.mm.+relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
twologL_6=-2*mod6$loglik[2]

chisq= twologL-twologL_6
chisq
## [1] 2.006006
# df =1, chisq= 3.84 at 0.05. menopause not significant

final model includes TS, TG, HT

Model with interaction

int_mod<-coxph(formula = Surv(TimeRecur,RecurCensor==1)~(HormoneTherapy==1)+relevel(as.factor(TumorGrade), ref="3")+(HormoneTherapy==1)*relevel(as.factor(TumorGrade), ref="3"), data=gbcs)
summary(int_mod)
## Call:
## coxph(formula = Surv(TimeRecur, RecurCensor == 1) ~ (HormoneTherapy == 
##     1) + relevel(as.factor(TumorGrade), ref = "3") + (HormoneTherapy == 
##     1) * relevel(as.factor(TumorGrade), ref = "3"), data = gbcs)
## 
##   n= 686, number of events= 299 
## 
##                                                                       coef
## HormoneTherapy == 1TRUE                                             0.1305
## relevel(as.factor(TumorGrade), ref = "3")1                         -1.2932
## relevel(as.factor(TumorGrade), ref = "3")2                         -0.4534
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1  0.2327
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2  0.2813
##                                                                    exp(coef)
## HormoneTherapy == 1TRUE                                               1.1394
## relevel(as.factor(TumorGrade), ref = "3")1                            0.2744
## relevel(as.factor(TumorGrade), ref = "3")2                            0.6355
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1    1.2620
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2    1.3248
##                                                                    se(coef)
## HormoneTherapy == 1TRUE                                              0.2479
## relevel(as.factor(TumorGrade), ref = "3")1                           0.4587
## relevel(as.factor(TumorGrade), ref = "3")2                           0.2429
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1   0.5581
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2   0.2899
##                                                                         z
## HormoneTherapy == 1TRUE                                             0.526
## relevel(as.factor(TumorGrade), ref = "3")1                         -2.819
## relevel(as.factor(TumorGrade), ref = "3")2                         -1.867
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1  0.417
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2  0.970
##                                                                    Pr(>|z|)
## HormoneTherapy == 1TRUE                                             0.59865
## relevel(as.factor(TumorGrade), ref = "3")1                          0.00481
## relevel(as.factor(TumorGrade), ref = "3")2                          0.06197
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1  0.67673
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2  0.33200
##                                                                      
## HormoneTherapy == 1TRUE                                              
## relevel(as.factor(TumorGrade), ref = "3")1                         **
## relevel(as.factor(TumorGrade), ref = "3")2                         . 
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1   
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                                                    exp(coef)
## HormoneTherapy == 1TRUE                                               1.1394
## relevel(as.factor(TumorGrade), ref = "3")1                            0.2744
## relevel(as.factor(TumorGrade), ref = "3")2                            0.6355
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1    1.2620
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2    1.3248
##                                                                    exp(-coef)
## HormoneTherapy == 1TRUE                                                0.8777
## relevel(as.factor(TumorGrade), ref = "3")1                             3.6444
## relevel(as.factor(TumorGrade), ref = "3")2                             1.5736
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1     0.7924
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2     0.7548
##                                                                    lower .95
## HormoneTherapy == 1TRUE                                               0.7009
## relevel(as.factor(TumorGrade), ref = "3")1                            0.1117
## relevel(as.factor(TumorGrade), ref = "3")2                            0.3948
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1    0.4226
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2    0.7505
##                                                                    upper .95
## HormoneTherapy == 1TRUE                                               1.8520
## relevel(as.factor(TumorGrade), ref = "3")1                            0.6743
## relevel(as.factor(TumorGrade), ref = "3")2                            1.0230
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")1    3.7685
## HormoneTherapy == 1TRUE:relevel(as.factor(TumorGrade), ref = "3")2    2.3386
## 
## Concordance= 0.602  (se = 0.017 )
## Rsquare= 0.047   (max possible= 0.995 )
## Likelihood ratio test= 32.8  on 5 df,   p=4.117e-06
## Wald test            = 27.42  on 5 df,   p=4.731e-05
## Score (logrank) test = 29.36  on 5 df,   p=1.97e-05