1. Generate the Kaplan-Meier estimator of the survival function for this set of 100 patients
library(survival)
fit <- survfit(Surv(time, status,type="right") ~ 1, data = heart)
plot(fit,conf.int=T,mark.time=T)


Kaplan-Meier Plot of the Estimated Survival Function by Gender

##separate curve for male and females
fit <- survfit(Surv(time, status) ~ gender, data=heart)
##comparing survival functions among the different genders
survfit(Surv(time,status)~ gender, data=heart)
## Call: survfit(formula = Surv(time, status) ~ gender, data = heart)
## 
##               records n.max n.start events median 0.95LCL 0.95UCL
## gender=Female      35    35      35     23   1806     841      NA
## gender=Male        65    65      65     28   2624    2012      NA
##plotting it
plot(fit, conf.int=FALSE, mark.time=TRUE,lty=1:3,col=1:2)
legend("bottomleft",legend=c("Male","Female"),
       title="Gender",col=1:2,lty=1:3,bty="n")


Fit a cox regression model predicting the hazard of death with age, bmi and gender as the predictors(and no interaction)

test<-coxph(Surv(time,status) ~ age+bmi+gender, method="efron",data=heart)
test %>%
  summary
## Call:
## coxph(formula = Surv(time, status) ~ age + bmi + gender, data = heart, 
##     method = "efron")
## 
##   n= 100, number of events= 51 
## 
##                coef exp(coef) se(coef)      z Pr(>|z|)   
## age         0.03713   1.03783  0.01272  2.918  0.00352 **
## bmi        -0.07083   0.93162  0.03607 -1.964  0.04956 * 
## genderMale -0.14325   0.86654  0.30604 -0.468  0.63973   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## age           1.0378     0.9636    1.0123    1.0640
## bmi           0.9316     1.0734    0.8680    0.9999
## genderMale    0.8665     1.1540    0.4756    1.5787
## 
## Concordance= 0.683  (se = 0.043 )
## Rsquare= 0.194   (max possible= 0.985 )
## Likelihood ratio test= 21.54  on 3 df,   p=8.135e-05
## Wald test            = 19.46  on 3 df,   p=0.0002197
## Score (logrank) test = 20.82  on 3 df,   p=0.000115

Table showing the required output

Variable Regression Coefficient Hazard Ratio 95% CI for Hazard Ratio pvalue
bmi -0.07 0.93 (0.87,0.9999) 0.05
age 0.04 1.04 (1.01,1.06) 0.003
genderMale -0.14 0.87 (0.48,1.58) 0.64

Be sure to obtain the test of the proportional hazards assumption for this model

##test of the proportional hazards assumption for this model
cox.zph(test, transform="km", global=TRUE)
##               rho  chisq     p
## age        0.0536 0.1986 0.656
## bmi        0.1420 1.5793 0.209
## genderMale 0.0325 0.0671 0.796
## GLOBAL         NA 1.6627 0.645

Descriptive Statistics For Age

##summarising age by gender
 sum<-heart %>%
  group_by(gender) %>%
   summarise(round(mean(age),digits=2),
          round(median(age),digits=2),round(sd(age),digits=2),round(IQR(age),digits=2)) %>%
    data.frame
colnames(sum)<-c("Gender","Mean","Median","SD","IQR")
sum %>%
  datatable


Descriptive Statistics For BMI

##sumarising bmi by gender
sum2<-heart %>%
  group_by(gender) %>%
  summarise(round(mean(bmi),digits=2),
            round(median(bmi),digits=2),round(sd(bmi),digits=2),round(IQR(age),digits=2))%>%
  data.frame
colnames(sum2)<-c("Gender","Mean","Median","SD","IQR")
sum2 %>%
  datatable