Script to the report on the survival analysis on a set of Cancer survival data.
NOTICE TO MARINERS: Results are commented in a seperate report. If you find this interesting but confusing, email me.
prescript
#If you, like me, run your code in RStudio, this could save you 1.5 seconds :-):
library(rstudioapi)
setwd(dirname(rstudioapi::callFun("getActiveDocumentContext")$path))
script
data0 <- read.csv2("Cancerfaktorer.csv") #read the data
sum0 <- summary(data0) #Have a look.
sum0
## Patnr Time Status PS
## Min. : 1.00 Min. : -73.462 Min. :0.0000 Min. :0.0000
## 1st Qu.: 28.25 1st Qu.: 8.172 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 58.00 Median : 17.338 Median :1.0000 Median :0.0000
## Mean : 64.14 Mean : 44.914 Mean :0.8063 Mean :0.4459
## 3rd Qu.: 96.75 3rd Qu.: 35.384 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :152.00 Max. :3650.238 Max. :1.0000 Max. :1.0000
##
## Stage Age Gender HB.risk
## IIIA: 77 Min. : 42.00 Female:109 Anemia : 21
## IIIB:145 1st Qu.: 56.72 Male :113 No anemia:198
## Median : 62.00 NA's : 3
## Mean : 65.26
## 3rd Qu.: 68.12
## Max. :710.00
## NA's :3
## LPK.risk TPK.risk
## Leukocytosis :123 No thrombocytosis: 96
## No leukocytosis: 95 Thrombocytosis :123
## NA's : 4 NA's : 3
##
##
##
##
write.csv(sum0, "sum0.csv")
#Ok, so we have a problem:
#Maximum Time is 3650 and minimum is -73.
#Values such as these are probably typos and need to be removed not to distort the analysis.
#Same goes for Age, with a maximum of 710.
#Let's have a closer look:
hist(data0$Time, breaks=20)
hist(data0$Age, breaks=20)
#Based on the density, I will suggest the below filtering:
library(dplyr)
data1 <- filter(data0, Time >= 0 & Time < 500 & Age < 110)
#Whitespaces may be problematic, lets strip the factors
data1$HB.risk <- as.factor(gsub(" ", "_", data1$HB.risk, fixed = TRUE))
data1$LPK.risk <- as.factor(gsub(" ", "_", data1$LPK.risk, fixed = TRUE))
data1$TPK.risk <- as.factor(gsub(" ", "_", data1$TPK.risk, fixed = TRUE))
#Inspect:
hist(data1$Time, breaks=20)
hist(data1$Age, breaks=20)
#Inspect:
summary(data1)
## Patnr Time Status PS
## Min. : 1.00 Min. : 0.8993 Min. :0.0000 Min. :0.0000
## 1st Qu.: 28.50 1st Qu.: 8.3778 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 58.00 Median :17.6756 Median :1.0000 Median :0.0000
## Mean : 64.13 Mean :25.4477 Mean :0.8047 Mean :0.4465
## 3rd Qu.: 96.50 3rd Qu.:35.3511 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :152.00 Max. :97.5770 Max. :1.0000 Max. :1.0000
## Stage Age Gender HB.risk
## IIIA: 76 Min. :42.00 Female:105 Anemia : 20
## IIIB:139 1st Qu.:56.84 Male :110 No_anemia:195
## Median :62.00
## Mean :62.36
## 3rd Qu.:68.00
## Max. :81.00
## LPK.risk TPK.risk
## Leukocytosis :120 No_thrombocytosis: 93
## No_leukocytosis: 94 Thrombocytosis :122
## NA's : 1
##
##
##
#Looks ok. We have some NA:s as well, but not so many that they should cause any problems.
#However, since we have had typos in data, let's be extraordinary careful and check for duplicates as well.
#Duplicates are problematic if not handled properly (with a design where time and interval are accounted for),
#since we will reuse information and therefore inflate the sample: Say a patient has no event on both occasions,
#then that information is fully readable the second occasion. The first occasion will be an artificial extra row of data,
#falsely increasing the survival rate.
library(tidyverse)
data1$dup <- duplicated(data1$Patnr)
summary(data1)
## Patnr Time Status PS
## Min. : 1.00 Min. : 0.8993 Min. :0.0000 Min. :0.0000
## 1st Qu.: 28.50 1st Qu.: 8.3778 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 58.00 Median :17.6756 Median :1.0000 Median :0.0000
## Mean : 64.13 Mean :25.4477 Mean :0.8047 Mean :0.4465
## 3rd Qu.: 96.50 3rd Qu.:35.3511 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :152.00 Max. :97.5770 Max. :1.0000 Max. :1.0000
## Stage Age Gender HB.risk
## IIIA: 76 Min. :42.00 Female:105 Anemia : 20
## IIIB:139 1st Qu.:56.84 Male :110 No_anemia:195
## Median :62.00
## Mean :62.36
## 3rd Qu.:68.00
## Max. :81.00
## LPK.risk TPK.risk dup
## Leukocytosis :120 No_thrombocytosis: 93 Mode :logical
## No_leukocytosis: 94 Thrombocytosis :122 FALSE:149
## NA's : 1 TRUE :66
##
##
##
#Oh, Nelly! 66 duplicates. However, by the looks of it, it is a separate patient group producing all the duplicates:
#One patient can only have one age when given the diagnosis and here, they differ between duplicates.
#E.g.: 1:TRUE has Age = 58, and 1:FALSE has age = 68, so they are NOT the same patient.
#This pattern is consistent: Age differs between the duplicates, throughout.
#We should be good to go.
#Nonetheless, lets mark the presumed patient groups for future reference:
data1$group <- seq.int(nrow(data1)) #create a column with sequential numbers.
data1$group <- as.factor(ifelse(data1$group > 144, "B", "A")) #by studying the dataset we know that the sequence of Patnr starts over at 144.
#Since R follows the alphabet, we need to set the reference level for two of the risk factors:
data1$HB.risk = relevel(data1$HB.risk, ref = "No_anemia")
data1$LPK.risk = relevel(data1$LPK.risk, ref = "No_leukocytosis")
#Inspect and export
sum1 <- summary(data1) #Have a look.
sum1
## Patnr Time Status PS
## Min. : 1.00 Min. : 0.8993 Min. :0.0000 Min. :0.0000
## 1st Qu.: 28.50 1st Qu.: 8.3778 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 58.00 Median :17.6756 Median :1.0000 Median :0.0000
## Mean : 64.13 Mean :25.4477 Mean :0.8047 Mean :0.4465
## 3rd Qu.: 96.50 3rd Qu.:35.3511 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :152.00 Max. :97.5770 Max. :1.0000 Max. :1.0000
## Stage Age Gender HB.risk
## IIIA: 76 Min. :42.00 Female:105 No_anemia:195
## IIIB:139 1st Qu.:56.84 Male :110 Anemia : 20
## Median :62.00
## Mean :62.36
## 3rd Qu.:68.00
## Max. :81.00
## LPK.risk TPK.risk dup group
## No_leukocytosis: 94 No_thrombocytosis: 93 Mode :logical A:144
## Leukocytosis :120 Thrombocytosis :122 FALSE:149 B: 71
## NA's : 1 TRUE :66
##
##
##
write.csv(sum1, "sum1.csv")
#Now we are definitely good to go.
library(survival) #This is the main survival package used to compute survival curves and fit cox regressions
library(survminer) #This package will allow us to plot our results
#We will start the analysis by creating a "survival object", which is a product of time and the event.
#the survival object will be used in all subsequent analyses.
surv_object <- Surv(time = data1$Time, event = data1$Status)
surv_object #a plus indicates a censored data point, i.e. the the event (death) has not occurred.
## [1] 28.7802900 5.4209450 35.2854200 53.6509200 7.3264880
## [6] 14.9486700 14.0944600 25.0677600 97.5770000+ 1.2813140
## [11] 97.1170400+ 25.1006200 51.1211500 96.8213600+ 20.6324400
## [16] 58.5790600 8.5092400 3.3182750 30.6529800 34.2012300
## [21] 94.1273100+ 8.8377820 4.4024640 10.5133500 8.3449690
## [26] 4.9609860 7.3264880 9.3634500 23.6221800 14.9158100
## [31] 26.2176600 19.6139600 92.1560600+ 12.4517500 17.8726900
## [36] 7.4579060 11.7618100 7.5893230 33.0513300 6.3737170
## [41] 90.8747400+ 13.2073900 17.7741300 65.8069800 8.1149890
## [46] 20.5667300 4.0082140 8.3778240 38.8665300 15.4086200
## [51] 8.7720740 77.3059500 27.6961000 9.1991790 31.1129400
## [56] 4.1067760 6.8008210 7.8193020 12.9445600 3.1540040
## [61] 39.1622200 86.4394200+ 20.8624200 13.8644800 6.6694050
## [66] 7.6221770 1.5770020 33.5113000 15.5400400 15.8685800
## [71] 14.4558500 24.2792600 84.1724900+ 8.7063660 19.3182800
## [76] 6.2751540 5.0924020 2.8583160 12.8788500 6.9979470
## [81] 36.2381900 82.5626300+ 0.8993056 7.1950720 21.9794700
## [86] 53.3552400 29.0431200 11.9917900 80.9856300+ 80.9527700+
## [91] 20.1396300 23.7535900 80.2628300+ 6.5708420 2.7826389
## [96] 78.8172500+ 5.1252570 13.8973300 29.1088300 6.4722790
## [101] 5.2895280 77.2731000+ 15.0800800 26.1191000 36.0082100
## [106] 6.5379880 16.0985600 75.0061600+ 6.0451750 20.6981500
## [111] 74.5462000+ 6.4394250 74.0862400+ 18.1683800 12.7145800
## [116] 8.3778240 17.6755600 42.7433300 12.7145800 72.4763900+
## [121] 26.1191000 16.3285400 19.9753600 8.4435320 9.0677610
## [126] 33.6427100 26.4476400 36.9281300 14.5872700 25.7905500
## [131] 55.5893200 6.5051340 32.4928100 68.7967100+ 68.6981500+
## [136] 5.7166320 17.6755600 19.8439400 19.7125300 23.6550300
## [141] 22.4722800 35.4168400 23.7535900 11.1375800 49.5000000+
## [146] 5.5000000 7.0000000 48.1000000+ 5.6000000 6.7000000
## [151] 17.0000000 14.3000000 13.1000000 15.8000000 3.7000000
## [156] 7.9000000 13.0000000 46.6000000+ 8.5000000 13.8000000
## [161] 22.4000000 21.5000000 9.1000000 6.3000000 44.5000000+
## [166] 22.5000000 43.8000000+ 43.3000000+ 8.9000000 35.9000000
## [171] 23.2000000 16.3000000 3.3000000 15.8000000 41.9000000+
## [176] 7.2000000 14.9000000 25.0000000 28.1000000 17.9000000
## [181] 41.0000000+ 20.3000000 14.7000000 9.7000000 10.9000000
## [186] 39.1000000+ 39.0000000+ 1.4000000 38.7000000+ 8.5000000
## [191] 22.9000000 38.3000000+ 38.3000000+ 16.4000000 31.1000000
## [196] 38.0000000+ 6.2000000 26.3000000 23.9000000 16.4000000
## [201] 9.8000000 37.1000000+ 36.7000000+ 7.9000000 36.6000000+
## [206] 36.2000000+ 7.9000000 35.9000000+ 6.7000000 3.1000000
## [211] 35.0000000+ 4.4000000 2.5000000 34.3000000+ 14.6000000+
#kaplan-Meier survival estimates for the risk factors of (main) interest.
#Compare survival for HB.risk (Anemia/No anemia):
HBcompare <- survfit(surv_object ~ HB.risk, data = data1)
print(HBcompare)
## Call: survfit(formula = surv_object ~ HB.risk, data = data1)
##
## n events median 0.95LCL 0.95UCL
## HB.risk=No_anemia 195 156 17.9 15.80 22.5
## HB.risk=Anemia 20 17 12.7 8.84 53.7
#Lets print survival curves for both states of LPK.risk (Anemia/No anemia)
ggsurvplot(HBcompare,
pval = TRUE, conf.int = TRUE,
risk.table = "abs_pct", # Add risk table with no and % at risk
risk.table.col = "strata", # Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_minimal(), # Change ggplot2 theme
palette = "Dark2")
#Test the significance of the differences between the two survival curves:
HB_diff <- survdiff(surv_object ~ HB.risk, data = data1)
HB_diff #Not significant. A problem here is the low number of HB.risk = Anemia.
## Call:
## survdiff(formula = surv_object ~ HB.risk, data = data1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## HB.risk=No_anemia 195 156 158.3 0.0326 0.385
## HB.risk=Anemia 20 17 14.7 0.3503 0.385
##
## Chisq= 0.4 on 1 degrees of freedom, p= 0.5
#Compare survival for LPK.risk (Leukocytosis/No leukocytosis):
LPKcompare <- survfit(surv_object ~ LPK.risk, data = data1)
print(LPKcompare)
## Call: survfit(formula = surv_object ~ LPK.risk, data = data1)
##
## 1 observation deleted due to missingness
## n events median 0.95LCL 0.95UCL
## LPK.risk=No_leukocytosis 94 72 21.7 17.8 28.8
## LPK.risk=Leukocytosis 120 100 14.7 10.5 19.6
#Let's print survival curves for both states of LPK.risk (Leukocytosis/No leukocytosis)
ggsurvplot(LPKcompare,
pval = TRUE, conf.int = TRUE,
risk.table = "abs_pct", # Add risk table with no and % at risk
risk.table.col = "strata", # Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_minimal(), # Change ggplot2 theme
palette = "Dark2")
#Test the significance of the differences between the two survival curves:
LPK_diff <- survdiff(surv_object ~ LPK.risk, data = data1)
LPK_diff #Significant at a 2% level.
## Call:
## survdiff(formula = surv_object ~ LPK.risk, data = data1)
##
## n=214, 1 observation deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## LPK.risk=No_leukocytosis 94 72 87.2 2.66 5.43
## LPK.risk=Leukocytosis 120 100 84.8 2.74 5.43
##
## Chisq= 5.4 on 1 degrees of freedom, p= 0.02
#Compare survival for TPK.risk (Thrombocytosis/No thrombocytosis):
TPKcompare <- survfit(surv_object ~ TPK.risk, data = data1)
print(TPKcompare)
## Call: survfit(formula = surv_object ~ TPK.risk, data = data1)
##
## n events median 0.95LCL 0.95UCL
## TPK.risk=No_thrombocytosis 93 68 23.2 17.9 29.0
## TPK.risk=Thrombocytosis 122 105 14.7 12.5 18.2
#Let's print survival curves for both states of TPK.risk (Thrombocytosis/No thrombocytosis)
ggsurvplot(TPKcompare,
pval = TRUE, conf.int = TRUE,
risk.table = "abs_pct", # Add risk table with no and % at risk
risk.table.col = "strata",# Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_minimal(), # Change ggplot2 theme
palette = "Dark2")
#Test the significance of the differences between the two survival curves:
TPK_diff <- survdiff(surv_object ~ TPK.risk, data = data1)
TPK_diff #Significant at a 0.6% level.
## Call:
## survdiff(formula = surv_object ~ TPK.risk, data = data1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TPK.risk=No_thrombocytosis 93 68 86.1 3.81 7.68
## TPK.risk=Thrombocytosis 122 105 86.9 3.78 7.68
##
## Chisq= 7.7 on 1 degrees of freedom, p= 0.006
#Fit a model
cox1 <- coxph(surv_object ~ PS + Stage + Age + Gender + HB.risk + LPK.risk + TPK.risk, data = data1)
summary(cox1) #Interestingly, the significance for the LKP.risk is gone.
## Call:
## coxph(formula = surv_object ~ PS + Stage + Age + Gender + HB.risk +
## LPK.risk + TPK.risk, data = data1)
##
## n= 214, number of events= 172
## (1 observation deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## PS 0.466353 1.594170 0.158433 2.944 0.00324 **
## StageIIIB -0.161875 0.850548 0.163693 -0.989 0.32272
## Age 0.008474 1.008510 0.009591 0.884 0.37694
## GenderMale 0.351796 1.421618 0.159682 2.203 0.02759 *
## HB.riskAnemia 0.010648 1.010705 0.262993 0.040 0.96770
## LPK.riskLeukocytosis 0.154123 1.166634 0.174627 0.883 0.37746
## TPK.riskThrombocytosis 0.395300 1.484830 0.185948 2.126 0.03351 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## PS 1.5942 0.6273 1.1686 2.175
## StageIIIB 0.8505 1.1757 0.6171 1.172
## Age 1.0085 0.9916 0.9897 1.028
## GenderMale 1.4216 0.7034 1.0396 1.944
## HB.riskAnemia 1.0107 0.9894 0.6036 1.692
## LPK.riskLeukocytosis 1.1666 0.8572 0.8285 1.643
## TPK.riskThrombocytosis 1.4848 0.6735 1.0313 2.138
##
## Concordance= 0.622 (se = 0.023 )
## Rsquare= 0.1 (max possible= 0.999 )
## Likelihood ratio test= 22.48 on 7 df, p=0.002
## Wald test = 22.63 on 7 df, p=0.002
## Score (logrank) test = 22.95 on 7 df, p=0.002
#Due to some other cofounding factors now included in the analysis, for sure.
#Let's do the boring stuff first: diagnostics of the model
ggcoxdiagnostics(cox1, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_minimal()) #No apparent structure in the residual.
testPH <- cox.zph(cox1)
testPH
## rho chisq p
## PS -0.0670 0.804 0.36977
## StageIIIB -0.0830 1.226 0.26812
## Age -0.0372 0.233 0.62950
## GenderMale -0.0305 0.162 0.68738
## HB.riskAnemia -0.0451 0.354 0.55209
## LPK.riskLeukocytosis -0.1949 7.195 0.00731
## TPK.riskThrombocytosis 0.0741 1.103 0.29355
## GLOBAL NA 10.646 0.15480
ggcoxzph(testPH, font.main = 10, font.x = 10, font.y = 10, ggtheme = theme_minimal()) #graph.
#Stratification on the problematic variable may solve our dilemma with non-proportional hazard for LKP.risk.
#Lets get a competing model:
cox2 <- coxph(surv_object ~ PS + Stage + Age + Gender + HB.risk + strata(LPK.risk) + TPK.risk, data = data1)
#will output a hazard ratio in the presence of two hazards intrinsic to the levels of LPK.risk
testPH2 <- cox.zph(cox2)
testPH2 #problem solved.
## rho chisq p
## PS -0.0605 0.643 0.423
## StageIIIB -0.0886 1.389 0.239
## Age -0.0285 0.136 0.713
## GenderMale -0.0268 0.123 0.726
## HB.riskAnemia -0.0410 0.290 0.590
## TPK.riskThrombocytosis 0.0682 0.929 0.335
## GLOBAL NA 3.585 0.733
summary(cox2)#However, overall fit not improved...
## Call:
## coxph(formula = surv_object ~ PS + Stage + Age + Gender + HB.risk +
## strata(LPK.risk) + TPK.risk, data = data1)
##
## n= 214, number of events= 172
## (1 observation deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## PS 0.482984 1.620904 0.158648 3.044 0.00233 **
## StageIIIB -0.170637 0.843127 0.163998 -1.040 0.29812
## Age 0.009027 1.009068 0.009583 0.942 0.34621
## GenderMale 0.334367 1.397056 0.159642 2.094 0.03622 *
## HB.riskAnemia 0.018707 1.018883 0.262551 0.071 0.94320
## TPK.riskThrombocytosis 0.406908 1.502167 0.187479 2.170 0.02998 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## PS 1.6209 0.6169 1.1877 2.212
## StageIIIB 0.8431 1.1861 0.6114 1.163
## Age 1.0091 0.9910 0.9903 1.028
## GenderMale 1.3971 0.7158 1.0217 1.910
## HB.riskAnemia 1.0189 0.9815 0.6090 1.705
## TPK.riskThrombocytosis 1.5022 0.6657 1.0402 2.169
##
## Concordance= 0.603 (se = 0.024 )
## Rsquare= 0.078 (max possible= 0.999 )
## Likelihood ratio test= 17.49 on 6 df, p=0.008
## Wald test = 17.66 on 6 df, p=0.007
## Score (logrank) test = 17.82 on 6 df, p=0.007
#The violation of the PH criteria does not worry me much,
#since the overall PH is ok and since LPK.risk is not significant anyway.
#In addition, the slight decrease of hazard is, by looking at the visuals, very modest.
#However, should, for some reason, LPK.risk be the main focus of attention in future research efforts,
#the design of the study need to take into account the probable time-dependence of LPT.risk,
#and the data structure needs to be changed.
#FOr now, I will continue with the first model (cox1).
ggforest(cox1, data = data1) #Hazards per covariate
ggsurvplot(survfit(cox1, data = data1), palette = "Grey",
ggtheme = theme_minimal()) #Overall survival plot, all covariates considered.
#Now, let's do something akin to the kaplan-Meier survival estimates but in the context of the cox regression
#1
HB_df <- with(data1,
data.frame(HB.risk = c("No_anemia", "Anemia"),
Age = rep(mean(Age, na.rm = TRUE), 2),
PS = c(0, 0),
Stage = c("IIIA", "IIIA"),
Gender = c("Female", "Female"),
LPK.risk = c("No_leukocytosis", "No_leukocytosis"),
TPK.risk = c("No_thrombocytosis", "No_thrombocytosis")
)
)
HB_df
## HB.risk Age PS Stage Gender LPK.risk TPK.risk
## 1 No_anemia 62.35902 0 IIIA Female No_leukocytosis No_thrombocytosis
## 2 Anemia 62.35902 0 IIIA Female No_leukocytosis No_thrombocytosis
HB_diff <- survfit(cox1, newdata = HB_df)
ggsurvplot(HB_diff, data = HB_diff, conf.int = TRUE, legend.labs=c("No_anemia", "Anemia"),
ggtheme = theme_minimal(),
palette = "Dark2"
)
#2
LPK_df <- with(data1,
data.frame(LPK.risk = c("No_leukocytosis", "Leukocytosis"),
Age = rep(mean(Age, na.rm = TRUE), 2),
PS = c(0, 0),
Stage = c("IIIA", "IIIA"),
Gender = c("Female", "Female"),
HB.risk = c("No_anemia", "No_anemia"),
TPK.risk = c("No_thrombocytosis", "No_thrombocytosis")
)
)
LPK_df
## LPK.risk Age PS Stage Gender HB.risk TPK.risk
## 1 No_leukocytosis 62.35902 0 IIIA Female No_anemia No_thrombocytosis
## 2 Leukocytosis 62.35902 0 IIIA Female No_anemia No_thrombocytosis
LKP_diff <- survfit(cox1, newdata = LPK_df)
ggsurvplot(LKP_diff, data = LKP_diff, conf.int = TRUE, legend.labs=c("No_leukocytosis", "Leukocytosis"),
ggtheme = theme_minimal(),
palette = "Dark2"
)
#3
TPK_df <- with(data1,
data.frame(TPK.risk = c("No_thrombocytosis", "Thrombocytosis"),
Age = rep(mean(Age, na.rm = TRUE), 2),
PS = c(0, 0),
Stage = c("IIIA", "IIIA"),
Gender = c("Female", "Female"),
HB.risk =c("No_anemia", "No_anemia"),
LPK.risk = c("No_leukocytosis", "No_leukocytosis")
)
)
TPK_df
## TPK.risk Age PS Stage Gender HB.risk LPK.risk
## 1 No_thrombocytosis 62.35902 0 IIIA Female No_anemia No_leukocytosis
## 2 Thrombocytosis 62.35902 0 IIIA Female No_anemia No_leukocytosis
TPK_diff <- survfit(cox1, newdata = TPK_df)
ggsurvplot(TPK_diff, data = TPK_diff, conf.int = TRUE, legend.labs=c("No_thrombocytosis", "Thrombocytosis"),
ggtheme = theme_minimal(),
palette = "Dark2"
)
GMY
## [1] "MYA"