# Setup SAS run in R Markdown
require(SASmarkdown)
sasexe <- "C:/Program Files/SASHome/SASFoundation/9.4/sas.exe"
sasopts <- "-nosplash -ls 75"
options(digits = 4)
library(tidyverse) #%>%
library(foreign) #read.spss
library(survival) #survfitThe dataset was provided in Chaper 4, Machine Learning in Medicine-Cookbook 1 (Cleophas and Zwinderman (2014)). We can get the original data in SPSS SAV format Coxoutcomeprediction.sav) from the website; published year: 2014; ISBN number: 978-3-319-04180-3
proc import datafile = "&pathfile./Coxoutcomeprediction.sav"
out= Coxoutcomeprediction;
run;
proc format;
value tx 0 = 'No'
1 = 'Yes';
value ev 0 = 'Occured'
1 = 'Censored';
run;
data Coxoutcomeprediction;
set Coxoutcomeprediction;
format treatment tx.
event ev.
followupmonth 2.;
run;
proc contents data=Coxoutcomeprediction;
run;
## The CONTENTS Procedure
##
## Data Set Name WORK.COXOUTCOMEPREDICTION Observations 60
## Member Type DATA Variables 4
## Engine V9 Indexes 0
## Created 11/17/2020 00:39:11 Observation Length 32
## Last Modified 11/17/2020 00:39:11 Deleted Observations 0
## Protection Compressed NO
## Data Set Type Sorted NO
## Label
## Data Representation WINDOWS_64
## Encoding wlatin1 Western (Windows)
##
##
## Engine/Host Dependent Information
##
## Data Set Page Size 65536
## Number of Data Set Pages 1
## First Data Page 1
## Max Obs per Page 2039
## Obs in First Data Page 60
## Number of Data Set Repairs 0
## ExtendObsCounter YES
## Filename C:\Users\minhh\AppData\Local\Temp\SAS
## Temporary
## Files\_TD72980_HN-LAPTOP_\coxoutcomeprediction.
## sas7bdat
## Release Created 9.0401M3
## Host Created X64_8PRO
##
##
## Alphabetic List of Variables and Attributes
##
## # Variable Type Len Format Label
##
## 4 age Num 8 F8.2 age
## 2 event Num 8 EV. event
## 1 followupmonth Num 8 2. followupmonth
## 3 treatment Num 8 TX. treatment
proc print data=Coxoutcomeprediction (obs=10);
run;
## Obs followupmonth event treatment age
##
## 1 1 Censored No 65.00
## 2 1 Censored No 66.00
## 3 2 Censored No 73.00
## 4 2 Censored No 91.00
## 5 2 Censored No 86.00
## 6 2 Censored No 87.00
## 7 2 Censored No 54.00
## 8 2 Censored No 66.00
## 9 2 Censored No 64.00
## 10 3 Occured No 62.00
ODS GRAPHICS ON;
proc lifetest data=Coxoutcomeprediction PLOTS=(S(TEST) LLS);
time followupmonth*event(0);
strata treatment;
run;
ODS GRAPHICS OFF;
## The LIFETEST Procedure
##
## Stratum 1: treatment = No
##
## Product-Limit Survival Estimates
##
## Survival
## Standard Number Number
## followupmonth Survival Failure Error Failed Left
##
## 0.0000 1.0000 0 0 0 30
## 1.0000 . . . 1 29
## 1.0000 0.9333 0.0667 0.0455 2 28
## 2.0000 . . . 3 27
## 2.0000 . . . 4 26
## 2.0000 . . . 5 25
## 2.0000 . . . 6 24
## 2.0000 . . . 7 23
## 2.0000 . . . 8 22
## 2.0000 0.7000 0.3000 0.0837 9 21
## 3.0000* . . . 9 20
## 4.0000 0.6650 0.3350 0.0865 10 19
## 5.0000 0.6300 0.3700 0.0887 11 18
## 6.0000 . . . 12 17
## 6.0000 0.5600 0.4400 0.0917 13 16
## 7.0000 0.5250 0.4750 0.0924 14 15
## 9.0000 . . . 15 14
## 9.0000 0.4550 0.5450 0.0924 16 13
## 11.0000 0.4200 0.5800 0.0917 17 12
## 12.0000 0.3850 0.6150 0.0905 18 11
## 14.0000 0.3500 0.6500 0.0887 19 10
## 16.0000 0.3150 0.6850 0.0865 20 9
## 17.0000 0.2800 0.7200 0.0837 21 8
## 18.0000 0.2450 0.7550 0.0802 22 7
## 30.0000* . . . 22 6
## 30.0000* . . . 22 5
## 30.0000* . . . 22 4
## 30.0000* . . . 22 3
## 30.0000* . . . 22 2
## 30.0000* . . . 22 1
## 30.0000* 0.2450 0.7550 . 22 0
##
## NOTE: The marked survival times are censored observations.
##
##
## Summary Statistics for Time Variable followupmonth
##
## Quartile Estimates
##
## Point 95% Confidence Interval
## Percent Estimate Transform [Lower Upper)
##
## 75 18.0000 LOGLOG 11.0000 .
## 50 9.0000 LOGLOG 4.0000 16.0000
## 25 2.0000 LOGLOG 2.0000 6.0000
##
##
## Standard
## Mean Error
##
## 9.6333 1.2732
##
## NOTE: The mean survival time and its standard error were underestimated
## because the largest observation was censored and the estimation was
## restricted to the largest event time.
##
##
##
## The LIFETEST Procedure
##
## Stratum 2: treatment = Yes
##
## Product-Limit Survival Estimates
##
## Survival
## Standard Number Number
## followupmonth Survival Failure Error Failed Left
##
## 0.0000 1.0000 0 0 0 30
## 16.0000 . . . 1 29
## 16.0000 0.9333 0.0667 0.0455 2 28
## 17.0000 0.9000 0.1000 0.0548 3 27
## 18.0000 0.8667 0.1333 0.0621 4 26
## 19.0000 . . . 5 25
## 19.0000 0.8000 0.2000 0.0730 6 24
## 20.0000 0.7667 0.2333 0.0772 7 23
## 21.0000 0.7333 0.2667 0.0807 8 22
## 22.0000 0.7000 0.3000 0.0837 9 21
## 22.0000* . . . 9 20
## 23.0000 0.6650 0.3350 0.0865 10 19
## 24.0000 0.6300 0.3700 0.0887 11 18
## 26.0000 0.5950 0.4050 0.0905 12 17
## 27.0000 0.5600 0.4400 0.0917 13 16
## 28.0000 . . . 14 15
## 28.0000 0.4900 0.5100 0.0926 15 14
## 28.0000* . . . 15 13
## 29.0000 . . . 16 12
## 29.0000 . . . 17 11
## 29.0000 0.3769 0.6231 0.0914 18 10
## 30.0000* . . . 18 9
## 30.0000* . . . 18 8
## 30.0000* . . . 18 7
## 30.0000* . . . 18 6
## 30.0000* . . . 18 5
## 30.0000* . . . 18 4
## 30.0000* . . . 18 3
## 30.0000* . . . 18 2
## 30.0000* . . . 18 1
## 30.0000* 0.3769 0.6231 . 18 0
##
## NOTE: The marked survival times are censored observations.
##
##
## Summary Statistics for Time Variable followupmonth
##
## Quartile Estimates
##
## Point 95% Confidence Interval
## Percent Estimate Transform [Lower Upper)
##
## 75 . LOGLOG 29.0000 .
## 50 28.0000 LOGLOG 23.0000 .
## 25 21.0000 LOGLOG 17.0000 26.0000
##
##
## Standard
## Mean Error
##
## 25.2700 0.8829
##
## NOTE: The mean survival time and its standard error were underestimated
## because the largest observation was censored and the estimation was
## restricted to the largest event time.
##
##
## Summary of the Number of Censored and Uncensored Values
##
## Percent
## Stratum treatment Total Failed Censored Censored
##
## 1 No 30 22 8 26.67
## 2 Yes 30 18 12 40.00
## ----------------------------------------------------------------
## Total 60 40 20 33.33
##
##
##
## The LIFETEST Procedure
##
## Testing Homogeneity of Survival Curves for followupmonth over Strata
##
##
## Rank Statistics
##
## treatment Log-Rank Wilcoxon
##
## No 8.6349 520.00
## Yes -8.6349 -520.00
##
##
## Covariance Matrix for the Log-Rank Statistics
##
## treatment No Yes
##
## No 8.17046 -8.17046
## Yes -8.17046 8.17046
##
##
## Covariance Matrix for the Wilcoxon Statistics
##
## treatment No Yes
##
## No 15272.0 -15272.0
## Yes -15272.0 15272.0
##
##
## Test of Equality over Strata
##
## Pr >
## Test Chi-Square DF Chi-Square
##
## Log-Rank 9.1257 1 0.0025
## Wilcoxon 17.7057 1 <.0001
## -2Log(LR) 8.7203 1 0.0031
proc phreg data=Coxoutcomeprediction;
class treatment(ref=first);
model followupmonth*event(0) = treatment age/TIES=EFRON;
run;
## The PHREG Procedure
##
## Model Information
##
## Data Set WORK.COXOUTCOMEPREDICTION
## Dependent Variable followupmonth followupmonth
## Censoring Variable event event
## Censoring Value(s) 0
## Ties Handling EFRON
##
##
## Number of Observations Read 60
## Number of Observations Used 60
##
##
## Class Level Information
##
## Design
## Class Value Variables
##
## treatment No 0
## Yes 1
##
##
## Summary of the Number of Event and Censored Values
##
## Percent
## Total Event Censored Censored
##
## 60 40 20 33.33
##
##
## Convergence Status
##
## Convergence criterion (GCONV=1E-8) satisfied.
##
##
## Model Fit Statistics
##
## Without With
## Criterion Covariates Covariates
##
## -2 LOG L 289.629 275.233
## AIC 289.629 279.233
## SBC 289.629 282.611
##
##
## Testing Global Null Hypothesis: BETA=0
##
## Test Chi-Square DF Pr > ChiSq
##
## Likelihood Ratio 14.3956 2 0.0007
## Score 14.7743 2 0.0006
## Wald 13.9329 2 0.0009
##
##
## Type 3 Tests
##
## Wald
## Effect DF Chi-Square Pr > ChiSq
##
## treatment 1 5.9694 0.0146
## age 1 5.8150 0.0159
##
##
## Analysis of Maximum Likelihood Estimates
##
## Parameter Standard
## Parameter DF Estimate Error Chi-Square Pr > ChiSq
##
## treatment Yes 1 -0.80685 0.33024 5.9694 0.0146
## age 1 0.02888 0.01198 5.8150 0.0159
##
## Analysis of Maximum Likelihood Estimates
##
## Hazard
## Parameter Ratio Label
##
## treatment Yes 0.446 treatment Yes
## age 1.029 age
proc phreg data=Coxoutcomeprediction noprint;
model followupmonth*event(0) = age;
strata treatment;
baseline out=temp survival=survival;
run;
data temp ; set temp;
loglogs=log(-log(survival));
logtime=log(followupmonth);
run;
symbol l=1 i=join v=dot;
proc gplot data=temp;
plot loglogs*followupmonth=treatment /vaxis=axis1 haxis=axis2 legend=legend1;
axis1 order=(-5 to 0 by 1)
label=(f=swiss h=2 a=90 'Log-minus-log survival')
value=(f=swiss h=2);
axis2 order=(0 to 60 by 6)
label=(f=swiss h=2 'Months')
value=(f=swiss h=2);
legend1 label=(h=2 'Treatment =')
value=(f=swiss h=2);
run;
proc phreg data=Coxoutcomeprediction;
model followupmonth*event(0) = treatment age treatmentfollowupmonth;
treatmentfollowupmonth=treatment*followupmonth;
run;
## The PHREG Procedure
##
## Model Information
##
## Data Set WORK.COXOUTCOMEPREDICTION
## Dependent Variable followupmonth followupmonth
## Censoring Variable event event
## Censoring Value(s) 0
## Ties Handling BRESLOW
##
##
## Number of Observations Read 60
## Number of Observations Used 60
##
##
## Summary of the Number of Event and Censored Values
##
## Percent
## Total Event Censored Censored
##
## 60 40 20 33.33
##
##
## Convergence Status
##
## Convergence criterion (GCONV=1E-8) satisfied.
##
##
## Model Fit Statistics
##
## Without With
## Criterion Covariates Covariates
##
## -2 LOG L 291.232 244.498
## AIC 291.232 250.498
## SBC 291.232 255.565
##
##
## Testing Global Null Hypothesis: BETA=0
##
## Test Chi-Square DF Pr > ChiSq
##
## Likelihood Ratio 46.7343 3 <.0001
## Score 32.1967 3 <.0001
## Wald 8.6364 3 0.0345
##
##
## Analysis of Maximum Likelihood Estimates
##
## Parameter Standard
## Parameter DF Estimate Error Chi-Square Pr > ChiSq
##
## treatment 1 -13.86685 7.18281 3.7271 0.0535
## age 1 0.02333 0.01150 4.1139 0.0425
## treatmentfollowupmon 1 0.78493 0.42440 3.4207 0.0644
##
## Analysis of Maximum Likelihood Estimates
##
## Hazard
## Parameter Ratio Label
##
## treatment 0.000 treatment
## age 1.024 age
## treatmentfollowupmon 2.192
# load data and reshape
df<-read.spss("Data/Coxoutcomeprediction.sav",to.data.frame = T,use.value.labels = TRUE) %>%
as_tibble() %>%
mutate(., Event = factor(.$event, levels = c(1,0), labels = c("Occured","Censored")), event = (.$event == 1))## re-encoding from CP1252
df$treatment <- relevel(factor(df$treatment, levels = c(0,1), labels = c("No","Yes")), ref = "No")
df| followupmonth | event | treatment | age | Event |
|---|---|---|---|---|
| 1 | TRUE | No | 65 | Occured |
| 1 | TRUE | No | 66 | Occured |
| 2 | TRUE | No | 73 | Occured |
| 2 | TRUE | No | 91 | Occured |
| 2 | TRUE | No | 86 | Occured |
| 2 | TRUE | No | 87 | Occured |
| 2 | TRUE | No | 54 | Occured |
| 2 | TRUE | No | 66 | Occured |
| 2 | TRUE | No | 64 | Occured |
| 3 | FALSE | No | 62 | Censored |
| 4 | TRUE | No | 57 | Occured |
| 5 | TRUE | No | 85 | Occured |
| 6 | TRUE | No | 85 | Occured |
| 6 | TRUE | No | 76 | Occured |
| 7 | TRUE | No | 76 | Occured |
| 9 | TRUE | No | 76 | Occured |
| 9 | TRUE | No | 65 | Occured |
| 11 | TRUE | No | 84 | Occured |
| 12 | TRUE | No | 34 | Occured |
| 14 | TRUE | No | 45 | Occured |
| 16 | TRUE | No | 56 | Occured |
| 17 | TRUE | No | 67 | Occured |
| 18 | TRUE | No | 86 | Occured |
| 30 | FALSE | No | 75 | Censored |
| 30 | FALSE | No | 65 | Censored |
| 30 | FALSE | No | 54 | Censored |
| 30 | FALSE | No | 46 | Censored |
| 30 | FALSE | No | 54 | Censored |
| 30 | FALSE | No | 75 | Censored |
| 30 | FALSE | No | 56 | Censored |
| 30 | FALSE | Yes | 56 | Censored |
| 30 | FALSE | Yes | 53 | Censored |
| 30 | FALSE | Yes | 34 | Censored |
| 30 | FALSE | Yes | 35 | Censored |
| 30 | FALSE | Yes | 37 | Censored |
| 30 | FALSE | Yes | 65 | Censored |
| 30 | FALSE | Yes | 45 | Censored |
| 30 | FALSE | Yes | 66 | Censored |
| 30 | FALSE | Yes | 55 | Censored |
| 30 | FALSE | Yes | 88 | Censored |
| 29 | TRUE | Yes | 67 | Occured |
| 29 | TRUE | Yes | 56 | Occured |
| 29 | TRUE | Yes | 54 | Occured |
| 28 | FALSE | Yes | 57 | Censored |
| 28 | TRUE | Yes | 57 | Occured |
| 28 | TRUE | Yes | 76 | Occured |
| 27 | TRUE | Yes | 67 | Occured |
| 26 | TRUE | Yes | 66 | Occured |
| 24 | TRUE | Yes | 56 | Occured |
| 23 | TRUE | Yes | 66 | Occured |
| 22 | TRUE | Yes | 84 | Occured |
| 22 | FALSE | Yes | 56 | Censored |
| 21 | TRUE | Yes | 46 | Occured |
| 20 | TRUE | Yes | 45 | Occured |
| 19 | TRUE | Yes | 76 | Occured |
| 19 | TRUE | Yes | 65 | Occured |
| 18 | TRUE | Yes | 45 | Occured |
| 17 | TRUE | Yes | 76 | Occured |
| 16 | TRUE | Yes | 56 | Occured |
| 16 | TRUE | Yes | 45 | Occured |
Y <- Surv(df$followupmonth, df$event==1)
kmfit1 <- survfit(Y ~ 1)
summary(kmfit1)## Call: survfit(formula = Y ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 60 2 0.967 0.0232 0.922 1.000
## 2 58 7 0.850 0.0461 0.764 0.945
## 4 50 1 0.833 0.0482 0.744 0.933
## 5 49 1 0.816 0.0501 0.723 0.920
## 6 48 2 0.782 0.0535 0.684 0.894
## 7 46 1 0.765 0.0550 0.665 0.881
## 9 45 2 0.731 0.0575 0.626 0.853
## 11 43 1 0.714 0.0587 0.608 0.839
## 12 42 1 0.697 0.0597 0.589 0.824
## 14 41 1 0.680 0.0606 0.571 0.810
## 16 40 3 0.629 0.0628 0.517 0.765
## 17 37 2 0.595 0.0638 0.482 0.734
## 18 35 2 0.561 0.0646 0.448 0.703
## 19 33 2 0.527 0.0650 0.414 0.671
## 20 31 1 0.510 0.0651 0.397 0.655
## 21 30 1 0.493 0.0651 0.381 0.639
## 22 29 1 0.476 0.0650 0.364 0.622
## 23 27 1 0.458 0.0650 0.347 0.605
## 24 26 1 0.441 0.0648 0.330 0.588
## 26 25 1 0.423 0.0646 0.314 0.571
## 27 24 1 0.405 0.0642 0.297 0.553
## 28 23 2 0.370 0.0633 0.265 0.518
## 29 20 3 0.315 0.0614 0.215 0.461
plot(kmfit1, main = "Plot of Survival Curve", xlab = "Length of Survival", ylab = "Proportion of Individuals who have Survived")kmfit2 <-survfit(Y ~ df$treatment)
summary(kmfit2)## Call: survfit(formula = Y ~ df$treatment)
##
## df$treatment=No
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 30 2 0.933 0.0455 0.848 1.000
## 2 28 7 0.700 0.0837 0.554 0.885
## 4 20 1 0.665 0.0865 0.515 0.858
## 5 19 1 0.630 0.0887 0.478 0.830
## 6 18 2 0.560 0.0917 0.406 0.772
## 7 16 1 0.525 0.0924 0.372 0.741
## 9 15 2 0.455 0.0924 0.306 0.677
## 11 13 1 0.420 0.0917 0.274 0.644
## 12 12 1 0.385 0.0905 0.243 0.610
## 14 11 1 0.350 0.0887 0.213 0.575
## 16 10 1 0.315 0.0865 0.184 0.540
## 17 9 1 0.280 0.0837 0.156 0.503
## 18 8 1 0.245 0.0802 0.129 0.465
##
## df$treatment=Yes
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 16 30 2 0.933 0.0455 0.848 1.000
## 17 28 1 0.900 0.0548 0.799 1.000
## 18 27 1 0.867 0.0621 0.753 0.997
## 19 26 2 0.800 0.0730 0.669 0.957
## 20 24 1 0.767 0.0772 0.629 0.934
## 21 23 1 0.733 0.0807 0.591 0.910
## 22 22 1 0.700 0.0837 0.554 0.885
## 23 20 1 0.665 0.0865 0.515 0.858
## 24 19 1 0.630 0.0887 0.478 0.830
## 26 18 1 0.595 0.0905 0.442 0.802
## 27 17 1 0.560 0.0917 0.406 0.772
## 28 16 2 0.490 0.0926 0.338 0.710
## 29 13 3 0.377 0.0914 0.234 0.606
plot(kmfit2, lty = c("solid", "dashed"), col=c("black","grey"),
xlab="survival time in months",ylab="survival probabilities")
legend("topright", c("Non-Treatment","Treatment"), lty=c("solid","dashed"), col=c("black","grey"))survdiff(Y ~ df$treatment)## Call:
## survdiff(formula = Y ~ df$treatment)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## df$treatment=No 30 22 13.4 5.58 9.13
## df$treatment=Yes 30 18 26.6 2.80 9.13
##
## Chisq= 9.1 on 1 degrees of freedom, p= 0.003
plot(kmfit2,fun="cloglog",xlab="time in months using logarithmic
scale",ylab="log-log survival", main="log-log curves by clinic")coxph(Y ~ treatment + age, data = df, method="efron")## Call:
## coxph(formula = Y ~ treatment + age, data = df, method = "efron")
##
## coef exp(coef) se(coef) z p
## treatmentYes -0.81 0.45 0.33 -2 0.01
## age 0.03 1.03 0.01 2 0.02
##
## Likelihood ratio test=14 on 2 df, p=7e-04
## n= 60, number of events= 40
#coxph(Y ~ strata(treatment) + age, data = df, method="efron")
#summary(coxph(Y ~ strata(treatment) + age, data = df, method="efron"))Cleophas, Ton J., and Aeilko H. Zwinderman. 2014. Machine Learning in Medicine - Cookbook. Book. 2014th ed. SpringerBriefs in Statistics. Cham: Springer International Publishing AG. doi:10.1007/978-3-319-04181-0.