#install.packages("survminer")
# install.packages("survival")
library(survival)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(ovarian)
glimpse(ovarian)
## Rows: 26
## Columns: 6
## $ futime <dbl> 59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744,...
## $ fustat <dbl> 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ age <dbl> 72.3315, 74.4932, 66.4658, 53.3644, 50.3397, 56.4301, 56.9...
## $ resid.ds <dbl> 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2...
## $ rx <dbl> 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2...
## $ ecog.ps <dbl> 1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1...
Read and attach data
help(ovarian)
## starting httpd help server ... done
Description
hist(ovarian$age)
ovarian <- ovarian %>% mutate(age_group = ifelse(age >=50, "above50", "below50"))
head(ovarian$age_group)
## [1] "above50" "above50" "above50" "above50" "above50" "above50"
ovarian$age_group <- factor(ovarian$age_group)
head(ovarian$age_group)
## [1] above50 above50 above50 above50 above50 above50
## Levels: above50 below50
bad
ovarian$rx <- factor(ovarian$rx,
levels = c("1", "2"),
labels = c("treatment-A", "treatment-B"))
ovarian$resid.ds <- factor(ovarian$resid.ds,
levels = c("1", "2"),
labels = c("no_residual", "yes_residual"))
ovarian$ecog.ps <- factor(ovarian$ecog.ps,
levels = c("1", "2"),
labels = c("good", "bad"))
hypothetical censoring variable would = 1 - fustat
glimpse(ovarian)
## Rows: 26
## Columns: 7
## $ futime <dbl> 59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744...
## $ fustat <dbl> 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ age <dbl> 72.3315, 74.4932, 66.4658, 53.3644, 50.3397, 56.4301, 56....
## $ resid.ds <fct> yes_residual, yes_residual, yes_residual, yes_residual, y...
## $ rx <fct> treatment-A, treatment-A, treatment-A, treatment-B, treat...
## $ ecog.ps <fct> good, good, bad, good, good, bad, bad, bad, good, bad, ba...
## $ age_group <fct> above50, above50, above50, above50, above50, above50, abo...
time<-ovarian$futime
#event<-ovarian$resid.ds
event<-(1-ovarian$fustat)
group1<-ovarian$age_group
group2<-ovarian$rx
x=cbind(ovarian$age,ovarian$rx)
Hazard Function
Nelson-Aalen estimator of the cumulative hazard function
The Kaplan-Meier estimator of the survival function – take the ratios of those without events over those at risk and multiply that over time
Kaplan-Meier non-parametric analysis
Where n are the total time slots and d is death or event.
kmsurvival <- survfit(Surv(time,event) ~ 1)
summary(kmsurvival)
## Call: survfit(formula = Surv(time, event) ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 377 19 1 0.9474 0.0512 0.8521 1.000
## 421 18 1 0.8947 0.0704 0.7669 1.000
## 448 16 1 0.8388 0.0854 0.6871 1.000
## 477 13 1 0.7743 0.1003 0.6007 0.998
## 744 10 1 0.6969 0.1164 0.5024 0.967
## 769 9 1 0.6194 0.1266 0.4150 0.925
## 770 8 1 0.5420 0.1323 0.3359 0.875
## 803 7 1 0.4646 0.1342 0.2637 0.818
## 855 6 1 0.3871 0.1323 0.1982 0.756
## 1040 5 1 0.3097 0.1265 0.1391 0.690
## 1106 4 1 0.2323 0.1162 0.0872 0.619
## 1129 3 1 0.1549 0.1000 0.0437 0.549
## 1206 2 1 0.0774 0.0741 0.0119 0.506
## 1227 1 1 0.0000 NaN NA NA
plot(kmsurvival, xlab="Time", ylab="Survival Probability",col=c("blue","red","green"))
kmsurvival1 <- survfit(Surv(time, event) ~ group1)
summary(kmsurvival1)
## Call: survfit(formula = Surv(time, event) ~ group1)
##
## group1=above50
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 377 14 1 0.929 0.0688 0.8030 1.000
## 421 13 1 0.857 0.0935 0.6921 1.000
## 448 11 1 0.779 0.1129 0.5866 1.000
## 477 8 1 0.682 0.1344 0.4633 1.000
## 744 5 1 0.545 0.1626 0.3041 0.978
## 769 4 1 0.409 0.1698 0.1814 0.923
## 770 3 1 0.273 0.1588 0.0871 0.854
## 1129 2 1 0.136 0.1249 0.0227 0.821
## 1227 1 1 0.000 NaN NA NA
##
## group1=below50
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 803 5 1 0.8 0.179 0.5161 1
## 855 4 1 0.6 0.219 0.2933 1
## 1040 3 1 0.4 0.219 0.1367 1
## 1106 2 1 0.2 0.179 0.0346 1
## 1206 1 1 0.0 NaN NA NA
plot(kmsurvival1,xlab="Time", ylab="Survival Probability",col=c("blue","red"))
legend("left",
c("Above 50","Below 50"),
fill=c("red","blue"))
## Result of Non parametric Survival Analysis by age groups:
kmsurvival2 <- survfit(Surv(time, event) ~ group2)
plot(kmsurvival2,xlab="Time", ylab="Survival Probability",col=c("blue","red"))
legend("left",
c("A","B"),
fill=c("red","blue"))
### Result of Non parametric Survival Analysis by rx groups #### The above result shows that at some time intervals,group A has more surival prbaibility than group B and also at some other stime intervals its the vice versa situation
nasurvival <- survfit(coxph(Surv(time,event)~1), type="aalen")
summary(nasurvival)
## Call: survfit(formula = coxph(Surv(time, event) ~ 1), type = "aalen")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 377 19 1 0.9487 0.0499 0.85574 1.000
## 421 18 1 0.8975 0.0687 0.77246 1.000
## 448 16 1 0.8431 0.0833 0.69465 1.000
## 477 13 1 0.7807 0.0978 0.61077 0.998
## 744 10 1 0.7064 0.1132 0.51598 0.967
## 769 9 1 0.6321 0.1233 0.43131 0.926
## 770 8 1 0.5578 0.1292 0.35427 0.878
## 803 7 1 0.4836 0.1316 0.28367 0.824
## 855 6 1 0.4093 0.1306 0.21900 0.765
## 1040 5 1 0.3351 0.1262 0.16019 0.701
## 1106 4 1 0.2610 0.1180 0.10761 0.633
## 1129 3 1 0.1870 0.1050 0.06220 0.562
## 1206 2 1 0.1134 0.0853 0.02598 0.495
## 1227 1 1 0.0417 0.0522 0.00359 0.485
plot(nasurvival, xlab="Time", ylab="Survival Probability")
#Semi Perimetric Model
note:x=cbind(ovarian\(age,ovarian\)rx)
Cox semi-parametric analysis model
coxph <- coxph(Surv(time,event) ~ x, method="breslow")
summary(coxph)
## Call:
## coxph(formula = Surv(time, event) ~ x, method = "breslow")
##
## n= 26, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x1 0.04580 1.04686 0.04454 1.028 0.304
## x2 -0.86227 0.42220 0.73753 -1.169 0.242
##
## exp(coef) exp(-coef) lower .95 upper .95
## x1 1.0469 0.9552 0.95934 1.142
## x2 0.4222 2.3685 0.09948 1.792
##
## Concordance= 0.542 (se = 0.106 )
## Likelihood ratio test= 1.56 on 2 df, p=0.5
## Wald test = 1.58 on 2 df, p=0.5
## Score (logrank) test = 1.6 on 2 df, p=0.4
Estimation of Paremetric Model
The 1.0469 value of age shows that the unit inrease in the “age” is associated with 4.7% increase in the hazard rate Note : The calculation is shown here =>(1.0469-1 => 0.0469*100 => 4.7% )
parametric survival models
exponential <- survreg(Surv(time,event) ~ x, dist="exponential")
summary(exponential)
##
## Call:
## survreg(formula = Surv(time, event) ~ x, dist = "exponential")
## Value Std. Error z p
## (Intercept) 6.3719 1.6513 3.86 0.00011
## x1 0.0161 0.0349 0.46 0.64529
## x2 -0.1234 0.6000 -0.21 0.83699
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -112.1 Loglik(intercept only)= -112.2
## Chisq= 0.22 on 2 degrees of freedom, p= 0.9
## Number of Newton-Raphson Iterations: 5
## n= 26
weibull <- survreg(Surv(time,event) ~ x, dist="weibull")
summary(weibull)
##
## Call:
## survreg(formula = Surv(time, event) ~ x, dist = "weibull")
## Value Std. Error z p
## (Intercept) 7.3442 0.4450 16.50 < 2e-16
## x1 -0.0183 0.0105 -1.73 0.083
## x2 0.2682 0.1736 1.55 0.122
## Log(scale) -1.3652 0.1918 -7.12 1.1e-12
##
## Scale= 0.255
##
## Weibull distribution
## Loglik(model)= -98.4 Loglik(intercept only)= -99.7
## Chisq= 2.78 on 2 degrees of freedom, p= 0.25
## Number of Newton-Raphson Iterations: 7
## n= 26
loglogistic <- survreg(Surv(time,event) ~ x, dist="loglogistic")
summary(loglogistic)
##
## Call:
## survreg(formula = Surv(time, event) ~ x, dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 7.61771 0.47180 16.15 < 2e-16
## x1 -0.02461 0.00928 -2.65 0.008
## x2 0.21703 0.16385 1.32 0.185
## Log(scale) -1.71981 0.21375 -8.05 8.6e-16
##
## Scale= 0.179
##
## Log logistic distribution
## Loglik(model)= -98.8 Loglik(intercept only)= -101.1
## Chisq= 4.53 on 2 degrees of freedom, p= 0.1
## Number of Newton-Raphson Iterations: 6
## n= 26
Only the exponential model is giving result which is consistant with the Kaplan-Meier survival function ,Nelson-Aalen estimator and COX proportional hazard model showing that older age have more hazard rate (having positive co-efficient values)