knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(survival)
library(survminer)
## Warning: package 'survminer' was built under R version 3.5.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: magrittr
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.5.3
This is a survival analysis done on a breast cancer dataset.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
clinical <- readRDS("C:/Users/Dustin and Morgan/Desktop/clinical.rds")
clinical$OS_STATUS <- as.numeric(clinical$OS_STATUS)
clinical$OS_STATUS[clinical$OS_STATUS == "DECEASED"] = "1"
clinical$OS_STATUS[clinical$OS_STATUS == "LIVING"] = "0"
clinical$OS_STATUS[clinical$OS_STATUS == "2"] = 0
clinical$OS_STATUS <- as.numeric(clinical$OS_STATUS)
summary(clinical)
## PATIENT_ID OS_MONTHS OS_STATUS
## Length:1904 Min. : 0.00 Min. :0.0000
## Class :character 1st Qu.: 60.83 1st Qu.:0.0000
## Mode :character Median :114.90 Median :1.0000
## Mean :125.03 Mean :0.5793
## 3rd Qu.:184.47 3rd Qu.:1.0000
## Max. :355.20 Max. :1.0000
##
## VITAL_STATUS INTCLUST COHORT AGE_AT_DIAGNOSIS
## Died of Disease :622 8 :289 Min. :1.000 Min. :21.93
## Died of Other Causes:480 3 :282 1st Qu.:1.000 1st Qu.:51.38
## Living :801 4ER+ :244 Median :3.000 Median :61.77
## NA's : 1 10 :219 Mean :2.644 Mean :61.09
## 5 :184 3rd Qu.:3.000 3rd Qu.:70.59
## 7 :182 Max. :5.000 Max. :96.29
## (Other):504
## LATERALITY NPI ER_IHC INFERRED_MENOPAUSAL_STATE
## l :935 Min. :1.000 neg : 429 post:1493
## null:106 1st Qu.:3.046 pos :1445 pre : 411
## r :863 Median :4.042 NA's: 30
## Mean :4.033
## 3rd Qu.:5.040
## Max. :6.360
##
## BREAST_SURGERY CELLULARITY HER2_SNP6
## BREAST CONSERVING: 755 high :939 GAIN : 417
## MASTECTOMY :1127 low :200 LOSS : 100
## null : 22 moderate:711 NEUT :1383
## null : 54 UNDEF: 4
##
##
##
## THREEGENE CLAUDIN_SUBTYPE CHEMOTHERAPY
## ER-/HER2- :290 Basal :199 NO :1508
## ER+/HER2- High Prolif:603 claudin-low:199 YES: 396
## ER+/HER2- Low Prolif :619 Her2 :220
## HER2+ :188 LumA :679
## null :204 LumB :461
## NC : 6
## Normal :140
## HORMONE_THERAPY RADIO_THERAPY HISTOLOGICAL_SUBTYPE
## NO : 730 NO : 767 IDC :1500
## YES:1174 YES:1137 ILC : 141
## IDC+ILC: 87
## IDC-TUB: 67
## IDC-MUC: 42
## IDC-MED: 31
## (Other): 36
This shows our variables for the data set, which include the timeframe (OS_Months), the status of the patient (OS_STATUS) where 0 is alive and 1 is deceased. Other variables are Intclust, cohort, age at diagnosis, laterality (indicating which breast the cancer was found), npi (Nottingham Prognostic Index), ER IHC (Where positive shows cancer cells grow in response to estrogen), Inferred menopausal state, the type of Breast Surgery performed, Cellularity (% of tumor volume occupied by invasive tumor cells), HER2_SNP6 (human epidermal growth factor receptor 2), THREEGENE, Claudin subtype, chemothereapy, hormon thereapy, radio therapy, and histological subtype.
Cox Proportional Hazards was used to see which of these variables are signficant to survival in breast cancer
coxclinical <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + COHORT + AGE_AT_DIAGNOSIS + LATERALITY + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = clinical)
## Warning in fitter(X, Y, strats, offset, init, control, weights = weights, :
## Loglik converged before variable 39 ; beta may be infinite.
summary(coxclinical)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + COHORT +
## AGE_AT_DIAGNOSIS + LATERALITY + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE +
## BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE +
## CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE,
## data = clinical)
##
## n= 1874, number of events= 1089
## (30 observations deleted due to missingness)
##
## coef exp(coef)
## INTCLUST10 -0.400162 0.670211
## INTCLUST2 0.092095 1.096469
## INTCLUST3 -0.174030 0.840272
## INTCLUST4ER- -0.231097 0.793663
## INTCLUST4ER+ -0.128344 0.879551
## INTCLUST5 0.551684 1.736175
## INTCLUST6 0.017199 1.017348
## INTCLUST7 -0.194490 0.823255
## INTCLUST8 -0.082124 0.921158
## INTCLUST9 0.058605 1.060356
## COHORT 0.036019 1.036676
## AGE_AT_DIAGNOSIS 0.053083 1.054517
## LATERALITYnull 0.640261 1.896976
## LATERALITYr -0.102772 0.902333
## NPI 0.232808 1.262139
## ER_IHCpos -0.226524 0.797301
## INFERRED_MENOPAUSAL_STATEpre 0.514892 1.673458
## BREAST_SURGERYMASTECTOMY 0.249966 1.283982
## BREAST_SURGERYnull 0.401878 1.494629
## CELLULARITYlow 0.110008 1.116287
## CELLULARITYmoderate 0.037290 1.037994
## CELLULARITYnull 0.011039 1.011100
## HER2_SNP6LOSS 0.062863 1.064881
## HER2_SNP6NEUT 0.015280 1.015397
## HER2_SNP6UNDEF -0.196452 0.821640
## THREEGENEER+/HER2- High Prolif 0.018773 1.018951
## THREEGENEER+/HER2- Low Prolif -0.043477 0.957455
## THREEGENEHER2+ -0.408924 0.664365
## THREEGENEnull -0.001178 0.998822
## CLAUDIN_SUBTYPEclaudin-low -0.195379 0.822523
## CLAUDIN_SUBTYPEHer2 -0.049823 0.951397
## CLAUDIN_SUBTYPELumA -0.156718 0.854945
## CLAUDIN_SUBTYPELumB -0.013972 0.986125
## CLAUDIN_SUBTYPENC 0.123037 1.130926
## CLAUDIN_SUBTYPENormal 0.063251 1.065295
## CHEMOTHERAPYYES 0.393407 1.482022
## HORMONE_THERAPYYES -0.057645 0.943985
## RADIO_THERAPYYES -0.080586 0.922576
## HISTOLOGICAL_SUBTYPEDCIS -6.749872 0.001171
## HISTOLOGICAL_SUBTYPEIDC 0.120307 1.127843
## HISTOLOGICAL_SUBTYPEIDC-MED -0.328760 0.719816
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.188385 1.207298
## HISTOLOGICAL_SUBTYPEIDC-TUB -0.117070 0.889523
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.261433 1.298790
## HISTOLOGICAL_SUBTYPEILC 0.381559 1.464566
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR -0.587939 0.555471
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE -1.105652 0.330995
## HISTOLOGICAL_SUBTYPEnull -0.031327 0.969159
## HISTOLOGICAL_SUBTYPEOTHER -0.427040 0.652437
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## se(coef) z
## INTCLUST10 0.192484 -2.079
## INTCLUST2 0.190409 0.484
## INTCLUST3 0.160485 -1.084
## INTCLUST4ER- 0.229561 -1.007
## INTCLUST4ER+ 0.166020 -0.773
## INTCLUST5 0.211542 2.608
## INTCLUST6 0.179034 0.096
## INTCLUST7 0.164340 -1.183
## INTCLUST8 0.152720 -0.538
## INTCLUST9 0.161633 0.363
## COHORT 0.028394 1.269
## AGE_AT_DIAGNOSIS 0.003975 13.353
## LATERALITYnull 0.133339 4.802
## LATERALITYr 0.064413 -1.596
## NPI 0.035971 6.472
## ER_IHCpos 0.139413 -1.625
## INFERRED_MENOPAUSAL_STATEpre 0.122973 4.187
## BREAST_SURGERYMASTECTOMY 0.081581 3.064
## BREAST_SURGERYnull 0.314965 1.276
## CELLULARITYlow 0.115241 0.955
## CELLULARITYmoderate 0.069232 0.539
## CELLULARITYnull 0.210071 0.053
## HER2_SNP6LOSS 0.171567 0.366
## HER2_SNP6NEUT 0.104886 0.146
## HER2_SNP6UNDEF 0.594983 -0.330
## THREEGENEER+/HER2- High Prolif 0.160394 0.117
## THREEGENEER+/HER2- Low Prolif 0.163353 -0.266
## THREEGENEHER2+ 0.217989 -1.876
## THREEGENEnull 0.153684 -0.008
## CLAUDIN_SUBTYPEclaudin-low 0.159251 -1.227
## CLAUDIN_SUBTYPEHer2 0.160328 -0.311
## CLAUDIN_SUBTYPELumA 0.171453 -0.914
## CLAUDIN_SUBTYPELumB 0.171688 -0.081
## CLAUDIN_SUBTYPENC 0.480743 0.256
## CLAUDIN_SUBTYPENormal 0.191390 0.330
## CHEMOTHERAPYYES 0.113076 3.479
## HORMONE_THERAPYYES 0.077550 -0.743
## RADIO_THERAPYYES 0.082288 -0.979
## HISTOLOGICAL_SUBTYPEDCIS 718.427982 -0.009
## HISTOLOGICAL_SUBTYPEIDC 0.583952 0.206
## HISTOLOGICAL_SUBTYPEIDC-MED 0.647502 -0.508
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.626245 0.301
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.615707 -0.190
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.600519 0.435
## HISTOLOGICAL_SUBTYPEILC 0.591833 0.645
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.774588 -0.759
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 1.162556 -0.951
## HISTOLOGICAL_SUBTYPEnull 1.167941 -0.027
## HISTOLOGICAL_SUBTYPEOTHER 0.771826 -0.553
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE 0.000000 NA
## HISTOLOGICAL_SUBTYPEPHYL 0.000000 NA
## Pr(>|z|)
## INTCLUST10 0.037623 *
## INTCLUST2 0.628619
## INTCLUST3 0.278189
## INTCLUST4ER- 0.314083
## INTCLUST4ER+ 0.439485
## INTCLUST5 0.009109 **
## INTCLUST6 0.923468
## INTCLUST7 0.236628
## INTCLUST8 0.590755
## INTCLUST9 0.716918
## COHORT 0.204608
## AGE_AT_DIAGNOSIS < 2e-16 ***
## LATERALITYnull 1.57e-06 ***
## LATERALITYr 0.110597
## NPI 9.67e-11 ***
## ER_IHCpos 0.104197
## INFERRED_MENOPAUSAL_STATEpre 2.83e-05 ***
## BREAST_SURGERYMASTECTOMY 0.002184 **
## BREAST_SURGERYnull 0.201974
## CELLULARITYlow 0.339786
## CELLULARITYmoderate 0.590144
## CELLULARITYnull 0.958091
## HER2_SNP6LOSS 0.714062
## HER2_SNP6NEUT 0.884176
## HER2_SNP6UNDEF 0.741263
## THREEGENEER+/HER2- High Prolif 0.906824
## THREEGENEER+/HER2- Low Prolif 0.790123
## THREEGENEHER2+ 0.060670 .
## THREEGENEnull 0.993882
## CLAUDIN_SUBTYPEclaudin-low 0.219875
## CLAUDIN_SUBTYPEHer2 0.755984
## CLAUDIN_SUBTYPELumA 0.360687
## CLAUDIN_SUBTYPELumB 0.935139
## CLAUDIN_SUBTYPENC 0.798005
## CLAUDIN_SUBTYPENormal 0.741034
## CHEMOTHERAPYYES 0.000503 ***
## HORMONE_THERAPYYES 0.457285
## RADIO_THERAPYYES 0.327425
## HISTOLOGICAL_SUBTYPEDCIS 0.992504
## HISTOLOGICAL_SUBTYPEIDC 0.836774
## HISTOLOGICAL_SUBTYPEIDC-MED 0.611638
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.763555
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.849200
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.663312
## HISTOLOGICAL_SUBTYPEILC 0.519117
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.447831
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.341577
## HISTOLOGICAL_SUBTYPEnull 0.978601
## HISTOLOGICAL_SUBTYPEOTHER 0.580068
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA
## HISTOLOGICAL_SUBTYPEPHYL NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef)
## INTCLUST10 0.670211 1.4921
## INTCLUST2 1.096469 0.9120
## INTCLUST3 0.840272 1.1901
## INTCLUST4ER- 0.793663 1.2600
## INTCLUST4ER+ 0.879551 1.1369
## INTCLUST5 1.736175 0.5760
## INTCLUST6 1.017348 0.9829
## INTCLUST7 0.823255 1.2147
## INTCLUST8 0.921158 1.0856
## INTCLUST9 1.060356 0.9431
## COHORT 1.036676 0.9646
## AGE_AT_DIAGNOSIS 1.054517 0.9483
## LATERALITYnull 1.896976 0.5272
## LATERALITYr 0.902333 1.1082
## NPI 1.262139 0.7923
## ER_IHCpos 0.797301 1.2542
## INFERRED_MENOPAUSAL_STATEpre 1.673458 0.5976
## BREAST_SURGERYMASTECTOMY 1.283982 0.7788
## BREAST_SURGERYnull 1.494629 0.6691
## CELLULARITYlow 1.116287 0.8958
## CELLULARITYmoderate 1.037994 0.9634
## CELLULARITYnull 1.011100 0.9890
## HER2_SNP6LOSS 1.064881 0.9391
## HER2_SNP6NEUT 1.015397 0.9848
## HER2_SNP6UNDEF 0.821640 1.2171
## THREEGENEER+/HER2- High Prolif 1.018951 0.9814
## THREEGENEER+/HER2- Low Prolif 0.957455 1.0444
## THREEGENEHER2+ 0.664365 1.5052
## THREEGENEnull 0.998822 1.0012
## CLAUDIN_SUBTYPEclaudin-low 0.822523 1.2158
## CLAUDIN_SUBTYPEHer2 0.951397 1.0511
## CLAUDIN_SUBTYPELumA 0.854945 1.1697
## CLAUDIN_SUBTYPELumB 0.986125 1.0141
## CLAUDIN_SUBTYPENC 1.130926 0.8842
## CLAUDIN_SUBTYPENormal 1.065295 0.9387
## CHEMOTHERAPYYES 1.482022 0.6748
## HORMONE_THERAPYYES 0.943985 1.0593
## RADIO_THERAPYYES 0.922576 1.0839
## HISTOLOGICAL_SUBTYPEDCIS 0.001171 853.9499
## HISTOLOGICAL_SUBTYPEIDC 1.127843 0.8866
## HISTOLOGICAL_SUBTYPEIDC-MED 0.719816 1.3892
## HISTOLOGICAL_SUBTYPEIDC-MUC 1.207298 0.8283
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.889523 1.1242
## HISTOLOGICAL_SUBTYPEIDC+ILC 1.298790 0.7699
## HISTOLOGICAL_SUBTYPEILC 1.464566 0.6828
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.555471 1.8003
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.330995 3.0212
## HISTOLOGICAL_SUBTYPEnull 0.969159 1.0318
## HISTOLOGICAL_SUBTYPEOTHER 0.652437 1.5327
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## lower .95 upper .95
## INTCLUST10 0.45959 0.9774
## INTCLUST2 0.75495 1.5925
## INTCLUST3 0.61350 1.1509
## INTCLUST4ER- 0.50610 1.2446
## INTCLUST4ER+ 0.63525 1.2178
## INTCLUST5 1.14691 2.6282
## INTCLUST6 0.71627 1.4450
## INTCLUST7 0.59655 1.1361
## INTCLUST8 0.68287 1.2426
## INTCLUST9 0.77245 1.4556
## COHORT 0.98056 1.0960
## AGE_AT_DIAGNOSIS 1.04633 1.0628
## LATERALITYnull 1.46071 2.4635
## LATERALITYr 0.79531 1.0238
## NPI 1.17622 1.3543
## ER_IHCpos 0.60667 1.0478
## INFERRED_MENOPAUSAL_STATEpre 1.31504 2.1296
## BREAST_SURGERYMASTECTOMY 1.09425 1.5066
## BREAST_SURGERYnull 0.80619 2.7710
## CELLULARITYlow 0.89060 1.3992
## CELLULARITYmoderate 0.90628 1.1888
## CELLULARITYnull 0.66986 1.5262
## HER2_SNP6LOSS 0.76079 1.4905
## HER2_SNP6NEUT 0.82672 1.2471
## HER2_SNP6UNDEF 0.25599 2.6371
## THREEGENEER+/HER2- High Prolif 0.74409 1.3953
## THREEGENEER+/HER2- Low Prolif 0.69514 1.3188
## THREEGENEHER2+ 0.43337 1.0185
## THREEGENEnull 0.73905 1.3499
## CLAUDIN_SUBTYPEclaudin-low 0.60199 1.1238
## CLAUDIN_SUBTYPEHer2 0.69485 1.3027
## CLAUDIN_SUBTYPELumA 0.61094 1.1964
## CLAUDIN_SUBTYPELumB 0.70435 1.3806
## CLAUDIN_SUBTYPENC 0.44078 2.9016
## CLAUDIN_SUBTYPENormal 0.73208 1.5502
## CHEMOTHERAPYYES 1.18742 1.8497
## HORMONE_THERAPYYES 0.81088 1.0989
## RADIO_THERAPYYES 0.78516 1.0840
## HISTOLOGICAL_SUBTYPEDCIS 0.00000 Inf
## HISTOLOGICAL_SUBTYPEIDC 0.35908 3.5425
## HISTOLOGICAL_SUBTYPEIDC-MED 0.20233 2.5608
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.35380 4.1198
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.26611 2.9734
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.40029 4.2141
## HISTOLOGICAL_SUBTYPEILC 0.45913 4.6717
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.12171 2.5351
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.03390 3.2314
## HISTOLOGICAL_SUBTYPEnull 0.09823 9.5620
## HISTOLOGICAL_SUBTYPEOTHER 0.14373 2.9615
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
##
## Concordance= 0.684 (se = 0.008 )
## Rsquare= 0.223 (max possible= 1 )
## Likelihood ratio test= 472.6 on 49 df, p=<2e-16
## Wald test = 463.8 on 49 df, p=<2e-16
## Score (logrank) test = 484.3 on 49 df, p=<2e-16
cox1 <- survfit(coxclinical)
autoplot(cox1)
This flags a few items as being significant to survival such as INTCLUST10, INTCLUST5, Age at diagnosis, Laterality being null, NPI, Inferred Menopausal State, Breast surgery type, and Chemotherapy being performed.
Next, survival curves are made for some of the covariates indicated as significant
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CHEMOTHERAPY, data = clinical))
Oddly enough, this shows that those who did not get chemotheraphy have a bettter survival curve than those who did
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INFERRED_MENOPAUSAL_STATE, data = clinical))
This shows those who are pre-menopasual state have a better survival curve than those who are post-menopasal state. This probably directly ties into our other variable, which is age at diagnosis. For this, we will use the mean of 61 to create a pre and post 61 variable to see what the survival curves look like for them.
clinical1 <- mutate(clinical, AG = ifelse((AGE_AT_DIAGNOSIS < 61), "Under61", "Over61"))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ AG, data = clinical1))
This is in agreement with our menopause variable, women who are under 61 tend to have a better survival curve than those who are over 61.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ LATERALITY, data = clinical))
This actually shows us that which breast the cancer develops in does not appear to significantly change the survival curve. However, the null response seems to be the reason why our cox model shows it was significant. It would be interesting to know if these nulls were that of a cancer spread that didn’t have a known origin location, or just insufficient data.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST, data = clinical))
This shows the many options for the INTCLUST variable and corresponding survival curves. It is difficult to see many of the different options, but from our cox model we were told that 10 and 5 were significant. 10 appears to have a higher survival rate than most, where 5 looks like it may be the lower end curve, but the colors make it hard to distinguish, so we will look at them both vs. the overall survival rate for the rest.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST, data = subset(clinical, INTCLUST %in% c(5, 10))))
When we compare INCLUST 5 vs INTCLUST 10, we see a substantial difference in survival curves. It appears that INTCLUST 10 patients have a much better survival rate than INTCLUST 5.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST == 5, data = clinical))
This shows when we compare patients that are positive for integrative cluster 5, their overall survival rate is much lower than the rest of the patients.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST == 10, data = clinical))
This shows that if a patient is positive for integrative cluster 10, their overall survival rate at first is slightly lower than the rest, but after a certain amount of time the survival rate is better than average.
clinical$QNPI <- with(clinical, cut(NPI,
breaks=quantile(NPI, probs=seq(0,1, by=0.25), na.rm=TRUE),
include.lowest=TRUE))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ QNPI, data = clinical))
By breaking NPI into quntiles, we can see that generally as you move from Q1 towards Q4, the survival curve decreases for each quantile. This makes sense due to the NPI score is calculated by tumor size, number of involved lymph nodes, and grade of the tumor. Higher numbers of NPI seem to correspond to more advanced cancer status.
Since the survival rate is much lower with those who have INTCLUST 5, I decided to subset those individuals and run a cox model on them to see if any of the covariates have a significant impact on those patients.
L5 <- subset(clinical, INTCLUST == 5)
coxclinicalL5 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L5)
summary(coxclinicalL5)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS +
## NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY +
## CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY +
## HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L5)
##
## n= 180, number of events= 123
## (4 observations deleted due to missingness)
##
## coef exp(coef)
## COHORT -0.061607 0.940252
## AGE_AT_DIAGNOSIS 0.005041 1.005053
## NPI 0.492253 1.635998
## ER_IHCpos -0.057880 0.943763
## INFERRED_MENOPAUSAL_STATEpre -0.158717 0.853238
## BREAST_SURGERYMASTECTOMY -0.090428 0.913540
## BREAST_SURGERYnull -0.623352 0.536144
## CELLULARITYlow -0.077823 0.925129
## CELLULARITYmoderate 0.172181 1.187893
## CELLULARITYnull -0.163305 0.849332
## HER2_SNP6LOSS NA NA
## HER2_SNP6NEUT NA NA
## HER2_SNP6UNDEF 0.192964 1.212839
## THREEGENEER+/HER2- High Prolif -1.454652 0.233482
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ -1.301742 0.272057
## THREEGENEnull -0.971167 0.378641
## CLAUDIN_SUBTYPEclaudin-low 0.046830 1.047944
## CLAUDIN_SUBTYPEHer2 0.349554 1.418434
## CLAUDIN_SUBTYPELumA 0.937926 2.554676
## CLAUDIN_SUBTYPELumB 0.966508 2.628748
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal 1.448824 4.258105
## CHEMOTHERAPYYES -0.127841 0.879994
## HORMONE_THERAPYYES -0.905836 0.404204
## RADIO_THERAPYYES -0.068000 0.934260
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC -0.744260 0.475086
## HISTOLOGICAL_SUBTYPEIDC-MED 0.772374 2.164901
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB -0.850790 0.427077
## HISTOLOGICAL_SUBTYPEIDC+ILC -0.896735 0.407899
## HISTOLOGICAL_SUBTYPEILC 0.315274 1.370635
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## se(coef) z Pr(>|z|)
## COHORT 0.096156 -0.641 0.521717
## AGE_AT_DIAGNOSIS 0.013242 0.381 0.703460
## NPI 0.147883 3.329 0.000873
## ER_IHCpos 0.358479 -0.161 0.871730
## INFERRED_MENOPAUSAL_STATEpre 0.334802 -0.474 0.635455
## BREAST_SURGERYMASTECTOMY 0.268971 -0.336 0.736720
## BREAST_SURGERYnull 1.109507 -0.562 0.574233
## CELLULARITYlow 0.407842 -0.191 0.848670
## CELLULARITYmoderate 0.239488 0.719 0.472169
## CELLULARITYnull 0.558759 -0.292 0.770085
## HER2_SNP6LOSS 0.000000 NA NA
## HER2_SNP6NEUT 0.000000 NA NA
## HER2_SNP6UNDEF 1.327718 0.145 0.884447
## THREEGENEER+/HER2- High Prolif 0.823217 -1.767 0.077223
## THREEGENEER+/HER2- Low Prolif 0.000000 NA NA
## THREEGENEHER2+ 0.691307 -1.883 0.059698
## THREEGENEnull 0.777439 -1.249 0.211596
## CLAUDIN_SUBTYPEclaudin-low 0.653612 0.072 0.942882
## CLAUDIN_SUBTYPEHer2 0.443426 0.788 0.430521
## CLAUDIN_SUBTYPELumA 0.606809 1.546 0.122184
## CLAUDIN_SUBTYPELumB 0.561971 1.720 0.085459
## CLAUDIN_SUBTYPENC 0.000000 NA NA
## CLAUDIN_SUBTYPENormal 0.608385 2.381 0.017246
## CHEMOTHERAPYYES 0.339974 -0.376 0.706894
## HORMONE_THERAPYYES 0.280192 -3.233 0.001225
## RADIO_THERAPYYES 0.262746 -0.259 0.795784
## HISTOLOGICAL_SUBTYPEDCIS 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEIDC 0.785944 -0.947 0.343657
## HISTOLOGICAL_SUBTYPEIDC-MED 1.371640 0.563 0.573365
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 1.334154 -0.638 0.523669
## HISTOLOGICAL_SUBTYPEIDC+ILC 1.318650 -0.680 0.496479
## HISTOLOGICAL_SUBTYPEILC 1.098859 0.287 0.774181
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEnull 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEOTHER 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE 0.000000 NA NA
## HISTOLOGICAL_SUBTYPEPHYL 0.000000 NA NA
##
## COHORT
## AGE_AT_DIAGNOSIS
## NPI ***
## ER_IHCpos
## INFERRED_MENOPAUSAL_STATEpre
## BREAST_SURGERYMASTECTOMY
## BREAST_SURGERYnull
## CELLULARITYlow
## CELLULARITYmoderate
## CELLULARITYnull
## HER2_SNP6LOSS
## HER2_SNP6NEUT
## HER2_SNP6UNDEF
## THREEGENEER+/HER2- High Prolif .
## THREEGENEER+/HER2- Low Prolif
## THREEGENEHER2+ .
## THREEGENEnull
## CLAUDIN_SUBTYPEclaudin-low
## CLAUDIN_SUBTYPEHer2
## CLAUDIN_SUBTYPELumA
## CLAUDIN_SUBTYPELumB .
## CLAUDIN_SUBTYPENC
## CLAUDIN_SUBTYPENormal *
## CHEMOTHERAPYYES
## HORMONE_THERAPYYES **
## RADIO_THERAPYYES
## HISTOLOGICAL_SUBTYPEDCIS
## HISTOLOGICAL_SUBTYPEIDC
## HISTOLOGICAL_SUBTYPEIDC-MED
## HISTOLOGICAL_SUBTYPEIDC-MUC
## HISTOLOGICAL_SUBTYPEIDC-TUB
## HISTOLOGICAL_SUBTYPEIDC+ILC
## HISTOLOGICAL_SUBTYPEILC
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE
## HISTOLOGICAL_SUBTYPEnull
## HISTOLOGICAL_SUBTYPEOTHER
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE
## HISTOLOGICAL_SUBTYPEPHYL
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef)
## COHORT 0.9403 1.0635
## AGE_AT_DIAGNOSIS 1.0051 0.9950
## NPI 1.6360 0.6112
## ER_IHCpos 0.9438 1.0596
## INFERRED_MENOPAUSAL_STATEpre 0.8532 1.1720
## BREAST_SURGERYMASTECTOMY 0.9135 1.0946
## BREAST_SURGERYnull 0.5361 1.8652
## CELLULARITYlow 0.9251 1.0809
## CELLULARITYmoderate 1.1879 0.8418
## CELLULARITYnull 0.8493 1.1774
## HER2_SNP6LOSS NA NA
## HER2_SNP6NEUT NA NA
## HER2_SNP6UNDEF 1.2128 0.8245
## THREEGENEER+/HER2- High Prolif 0.2335 4.2830
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ 0.2721 3.6757
## THREEGENEnull 0.3786 2.6410
## CLAUDIN_SUBTYPEclaudin-low 1.0479 0.9542
## CLAUDIN_SUBTYPEHer2 1.4184 0.7050
## CLAUDIN_SUBTYPELumA 2.5547 0.3914
## CLAUDIN_SUBTYPELumB 2.6287 0.3804
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal 4.2581 0.2348
## CHEMOTHERAPYYES 0.8800 1.1364
## HORMONE_THERAPYYES 0.4042 2.4740
## RADIO_THERAPYYES 0.9343 1.0704
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC 0.4751 2.1049
## HISTOLOGICAL_SUBTYPEIDC-MED 2.1649 0.4619
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.4271 2.3415
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.4079 2.4516
## HISTOLOGICAL_SUBTYPEILC 1.3706 0.7296
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## lower .95 upper .95
## COHORT 0.77875 1.135
## AGE_AT_DIAGNOSIS 0.97930 1.031
## NPI 1.22435 2.186
## ER_IHCpos 0.46744 1.905
## INFERRED_MENOPAUSAL_STATEpre 0.44268 1.645
## BREAST_SURGERYMASTECTOMY 0.53924 1.548
## BREAST_SURGERYnull 0.06094 4.717
## CELLULARITYlow 0.41595 2.058
## CELLULARITYmoderate 0.74289 1.899
## CELLULARITYnull 0.28409 2.539
## HER2_SNP6LOSS NA NA
## HER2_SNP6NEUT NA NA
## HER2_SNP6UNDEF 0.08988 16.367
## THREEGENEER+/HER2- High Prolif 0.04651 1.172
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ 0.07018 1.055
## THREEGENEnull 0.08250 1.738
## CLAUDIN_SUBTYPEclaudin-low 0.29106 3.773
## CLAUDIN_SUBTYPEHer2 0.59479 3.383
## CLAUDIN_SUBTYPELumA 0.77771 8.392
## CLAUDIN_SUBTYPELumB 0.87377 7.909
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal 1.29228 14.031
## CHEMOTHERAPYYES 0.45195 1.713
## HORMONE_THERAPYYES 0.23340 0.700
## RADIO_THERAPYYES 0.55824 1.564
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC 0.10181 2.217
## HISTOLOGICAL_SUBTYPEIDC-MED 0.14720 31.841
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.03125 5.836
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.03077 5.407
## HISTOLOGICAL_SUBTYPEILC 0.15906 11.811
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
##
## Concordance= 0.665 (se = 0.026 )
## Rsquare= 0.207 (max possible= 0.998 )
## Likelihood ratio test= 41.64 on 27 df, p=0.04
## Wald test = 40.98 on 27 df, p=0.04
## Score (logrank) test = 45.51 on 27 df, p=0.01
This shows that NPI, CLAUDIN subtype normal, and having undergone hormone therapy are all significant covariates for those who are in the INTCLUST 5 group
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CLAUDIN_SUBTYPE == 'Normal', data = L5))
This seems to show if you have the claudin subtype = normal, you have a lower survival rate than the rest of the claudin subtypes if you have INTCLUST 5. However, this seems to be a very low sample size so the validity of this claim may not be valid without proper samples.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ HORMONE_THERAPY, data = L5))
Intersting here that overall, hormone therapy was shown to not be significant on breast cancer survival rates in our overall model. However, when looking at this by INTCLUST 5 patients, those who had hormone therapy had a slightly better survival rate than those who did not.
aaregclinical <- aareg(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + CHEMOTHERAPY, data = clinical)
autoplot(aaregclinical)+theme(legend.position = "none")
This shows us how the covariates change over time. It is interesting to see some of the curves of Breast Surgery = mastectomy and a patient having chemotherapy.
L10 <- subset(clinical, INTCLUST == 10)
coxclinicalL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L10)
## Warning in fitter(X, Y, strats, offset, init, control, weights = weights, :
## Loglik converged before variable 23,28,29 ; beta may be infinite.
summary(coxclinicalL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS +
## NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY +
## CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY +
## HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L10)
##
## n= 218, number of events= 104
## (1 observation deleted due to missingness)
##
## coef exp(coef)
## COHORT 6.002e-02 1.062e+00
## AGE_AT_DIAGNOSIS 3.220e-02 1.033e+00
## NPI 1.689e-01 1.184e+00
## ER_IHCpos -5.056e-01 6.031e-01
## INFERRED_MENOPAUSAL_STATEpre 2.510e-01 1.285e+00
## BREAST_SURGERYMASTECTOMY -6.840e-02 9.339e-01
## BREAST_SURGERYnull 1.648e+00 5.198e+00
## CELLULARITYlow 7.263e-01 2.067e+00
## CELLULARITYmoderate 1.988e-01 1.220e+00
## CELLULARITYnull -2.438e-01 7.836e-01
## HER2_SNP6LOSS -6.661e-02 9.356e-01
## HER2_SNP6NEUT -2.661e-02 9.737e-01
## HER2_SNP6UNDEF NA NA
## THREEGENEER+/HER2- High Prolif 3.809e+00 4.509e+01
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ -3.564e-01 7.002e-01
## THREEGENEnull 8.386e-01 2.313e+00
## CLAUDIN_SUBTYPEclaudin-low -3.036e-01 7.381e-01
## CLAUDIN_SUBTYPEHer2 -1.127e-01 8.934e-01
## CLAUDIN_SUBTYPELumA -1.569e+00 2.082e-01
## CLAUDIN_SUBTYPELumB -3.574e+00 2.803e-02
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal -1.742e+01 2.707e-08
## CHEMOTHERAPYYES 3.933e-01 1.482e+00
## HORMONE_THERAPYYES 3.405e-01 1.406e+00
## RADIO_THERAPYYES -2.716e-01 7.622e-01
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC 1.773e+01 4.991e+07
## HISTOLOGICAL_SUBTYPEIDC-MED 1.672e+01 1.817e+07
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB -5.160e-01 5.969e-01
## HISTOLOGICAL_SUBTYPEIDC+ILC NA NA
## HISTOLOGICAL_SUBTYPEILC -2.873e-01 7.503e-01
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## se(coef) z
## COHORT 9.889e-02 0.607
## AGE_AT_DIAGNOSIS 1.431e-02 2.251
## NPI 1.949e-01 0.867
## ER_IHCpos 4.660e-01 -1.085
## INFERRED_MENOPAUSAL_STATEpre 3.783e-01 0.663
## BREAST_SURGERYMASTECTOMY 2.316e-01 -0.295
## BREAST_SURGERYnull 5.606e-01 2.940
## CELLULARITYlow 3.398e-01 2.138
## CELLULARITYmoderate 2.705e-01 0.735
## CELLULARITYnull 1.035e+00 -0.236
## HER2_SNP6LOSS 5.813e-01 -0.115
## HER2_SNP6NEUT 3.791e-01 -0.070
## HER2_SNP6UNDEF 0.000e+00 NA
## THREEGENEER+/HER2- High Prolif 1.436e+00 2.652
## THREEGENEER+/HER2- Low Prolif 0.000e+00 NA
## THREEGENEHER2+ 1.110e+00 -0.321
## THREEGENEnull 2.613e-01 3.209
## CLAUDIN_SUBTYPEclaudin-low 2.712e-01 -1.120
## CLAUDIN_SUBTYPEHer2 6.246e-01 -0.180
## CLAUDIN_SUBTYPELumA 8.664e+03 0.000
## CLAUDIN_SUBTYPELumB 1.400e+00 -2.553
## CLAUDIN_SUBTYPENC 0.000e+00 NA
## CLAUDIN_SUBTYPENormal 8.223e+03 -0.002
## CHEMOTHERAPYYES 3.571e-01 1.101
## HORMONE_THERAPYYES 2.436e-01 1.398
## RADIO_THERAPYYES 2.803e-01 -0.969
## HISTOLOGICAL_SUBTYPEDCIS 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEIDC 6.963e+03 0.003
## HISTOLOGICAL_SUBTYPEIDC-MED 6.963e+03 0.002
## HISTOLOGICAL_SUBTYPEIDC-MUC 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 9.848e+03 0.000
## HISTOLOGICAL_SUBTYPEIDC+ILC 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEILC 8.664e+03 0.000
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEnull 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEOTHER 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE 0.000e+00 NA
## HISTOLOGICAL_SUBTYPEPHYL 0.000e+00 NA
## Pr(>|z|)
## COHORT 0.54391
## AGE_AT_DIAGNOSIS 0.02441 *
## NPI 0.38607
## ER_IHCpos 0.27787
## INFERRED_MENOPAUSAL_STATEpre 0.50709
## BREAST_SURGERYMASTECTOMY 0.76778
## BREAST_SURGERYnull 0.00328 **
## CELLULARITYlow 0.03254 *
## CELLULARITYmoderate 0.46248
## CELLULARITYnull 0.81377
## HER2_SNP6LOSS 0.90876
## HER2_SNP6NEUT 0.94403
## HER2_SNP6UNDEF NA
## THREEGENEER+/HER2- High Prolif 0.00800 **
## THREEGENEER+/HER2- Low Prolif NA
## THREEGENEHER2+ 0.74810
## THREEGENEnull 0.00133 **
## CLAUDIN_SUBTYPEclaudin-low 0.26289
## CLAUDIN_SUBTYPEHer2 0.85684
## CLAUDIN_SUBTYPELumA 0.99986
## CLAUDIN_SUBTYPELumB 0.01067 *
## CLAUDIN_SUBTYPENC NA
## CLAUDIN_SUBTYPENormal 0.99831
## CHEMOTHERAPYYES 0.27073
## HORMONE_THERAPYYES 0.16220
## RADIO_THERAPYYES 0.33256
## HISTOLOGICAL_SUBTYPEDCIS NA
## HISTOLOGICAL_SUBTYPEIDC 0.99797
## HISTOLOGICAL_SUBTYPEIDC-MED 0.99808
## HISTOLOGICAL_SUBTYPEIDC-MUC NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.99996
## HISTOLOGICAL_SUBTYPEIDC+ILC NA
## HISTOLOGICAL_SUBTYPEILC 0.99997
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA
## HISTOLOGICAL_SUBTYPEnull NA
## HISTOLOGICAL_SUBTYPEOTHER NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA
## HISTOLOGICAL_SUBTYPEPHYL NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef)
## COHORT 1.062e+00 9.417e-01
## AGE_AT_DIAGNOSIS 1.033e+00 9.683e-01
## NPI 1.184e+00 8.446e-01
## ER_IHCpos 6.031e-01 1.658e+00
## INFERRED_MENOPAUSAL_STATEpre 1.285e+00 7.780e-01
## BREAST_SURGERYMASTECTOMY 9.339e-01 1.071e+00
## BREAST_SURGERYnull 5.198e+00 1.924e-01
## CELLULARITYlow 2.067e+00 4.837e-01
## CELLULARITYmoderate 1.220e+00 8.197e-01
## CELLULARITYnull 7.836e-01 1.276e+00
## HER2_SNP6LOSS 9.356e-01 1.069e+00
## HER2_SNP6NEUT 9.737e-01 1.027e+00
## HER2_SNP6UNDEF NA NA
## THREEGENEER+/HER2- High Prolif 4.509e+01 2.218e-02
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ 7.002e-01 1.428e+00
## THREEGENEnull 2.313e+00 4.323e-01
## CLAUDIN_SUBTYPEclaudin-low 7.381e-01 1.355e+00
## CLAUDIN_SUBTYPEHer2 8.934e-01 1.119e+00
## CLAUDIN_SUBTYPELumA 2.082e-01 4.803e+00
## CLAUDIN_SUBTYPELumB 2.803e-02 3.568e+01
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal 2.707e-08 3.694e+07
## CHEMOTHERAPYYES 1.482e+00 6.748e-01
## HORMONE_THERAPYYES 1.406e+00 7.114e-01
## RADIO_THERAPYYES 7.622e-01 1.312e+00
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC 4.991e+07 2.004e-08
## HISTOLOGICAL_SUBTYPEIDC-MED 1.817e+07 5.504e-08
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 5.969e-01 1.675e+00
## HISTOLOGICAL_SUBTYPEIDC+ILC NA NA
## HISTOLOGICAL_SUBTYPEILC 7.503e-01 1.333e+00
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
## lower .95 upper .95
## COHORT 0.874763 1.2890
## AGE_AT_DIAGNOSIS 1.004167 1.0621
## NPI 0.808123 1.7348
## ER_IHCpos 0.241981 1.5033
## INFERRED_MENOPAUSAL_STATEpre 0.612309 2.6979
## BREAST_SURGERYMASTECTOMY 0.593086 1.4705
## BREAST_SURGERYnull 1.732392 15.5978
## CELLULARITYlow 1.062255 4.0238
## CELLULARITYmoderate 0.717886 2.0730
## CELLULARITYnull 0.103040 5.9593
## HER2_SNP6LOSS 0.299423 2.9232
## HER2_SNP6NEUT 0.463196 2.0470
## HER2_SNP6UNDEF NA NA
## THREEGENEER+/HER2- High Prolif 2.702484 752.3891
## THREEGENEER+/HER2- Low Prolif NA NA
## THREEGENEHER2+ 0.079528 6.1645
## THREEGENEnull 1.385932 3.8603
## CLAUDIN_SUBTYPEclaudin-low 0.433814 1.2560
## CLAUDIN_SUBTYPEHer2 0.262665 3.0390
## CLAUDIN_SUBTYPELumA 0.000000 Inf
## CLAUDIN_SUBTYPELumB 0.001803 0.4358
## CLAUDIN_SUBTYPENC NA NA
## CLAUDIN_SUBTYPENormal 0.000000 Inf
## CHEMOTHERAPYYES 0.735952 2.9838
## HORMONE_THERAPYYES 0.871999 2.2658
## RADIO_THERAPYYES 0.440056 1.3201
## HISTOLOGICAL_SUBTYPEDCIS NA NA
## HISTOLOGICAL_SUBTYPEIDC 0.000000 Inf
## HISTOLOGICAL_SUBTYPEIDC-MED 0.000000 Inf
## HISTOLOGICAL_SUBTYPEIDC-MUC NA NA
## HISTOLOGICAL_SUBTYPEIDC-TUB 0.000000 Inf
## HISTOLOGICAL_SUBTYPEIDC+ILC NA NA
## HISTOLOGICAL_SUBTYPEILC 0.000000 Inf
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR NA NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE NA NA
## HISTOLOGICAL_SUBTYPEnull NA NA
## HISTOLOGICAL_SUBTYPEOTHER NA NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE NA NA
## HISTOLOGICAL_SUBTYPEPHYL NA NA
##
## Concordance= 0.693 (se = 0.026 )
## Rsquare= 0.225 (max possible= 0.991 )
## Likelihood ratio test= 55.54 on 27 df, p=0.001
## Wald test = 47.91 on 27 df, p=0.008
## Score (logrank) test = 56.38 on 27 df, p=8e-04
Looking at INTCLUST10 patients, we see a few more covariates are significant than INTCLUST5, which include age at diagnosis, Breast surgery = null, cellularity = low, Threegene null or Threegene R+/HER2- High Prolif, and Claudin subtype = LumB.
L10AG <- mutate(L10, AG = ifelse((AGE_AT_DIAGNOSIS < 61), "Under61", "Over61"))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ AG, data = L10AG))
Again we will split the ages by over/under 61, and for this we see that the curves are very similar to start, but start to diverge after t=100 to where those under61 have a better survival curve than those who are over 61.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE == 'null', data = L10))
This shows that those who had Threegene = null, they had a lower survival rate than the rest of the subjects with any other threegene response. However, this again has pretty low sample so more data would be needed before coming to this conclusion.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE == 'ER-/HER2- High Prolif', data = L10))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE, data = L10))
Looking at ER-/HER2- High Prolif vs. all of the Threegene responses, it seems like ER-/HER2- High Prolif has a slightly better survival rate at the beginning, but again with low volume it is difficult to make this conclusion.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ BREAST_SURGERY == 'null', data = L10))
Again, with low volume no real conclusion can be drawn from this information. It would seem to suggest those who do not get breast surgery have a much lower survival rate, but that cannot be concluded from the information we have.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CELLULARITY == 'low', data = L10))
Another low volume variable, this would suggest that patients with low cellularity would have a lower survival curve, but with low volume that assumption would need further testing.
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CLAUDIN_SUBTYPE == 'LumB', data = L10))
This would suggest that patients with INTCLUST 10 and Claudin subtype = LumB would have a higher survival curve than those who do not.
SUMMARY
Many of the INTCLUST 5 and INTCLUST 10 have some interesting ideas that could form the basis for further exploration, but due to some low volumes real conclusions would be difficult to draw from many of the variables within these subsets.
gene <- readRDS("C:/Users/Dustin and Morgan/Desktop/gene_expression.rds")
geneclinical <- cbind(clinical, gene)
geneclincox <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = geneclinical)
summary(geneclincox)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 +
## CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 +
## CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = geneclinical)
##
## n= 1904, number of events= 1103
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BRCA1 -0.10850 0.89718 0.22330 -0.486 0.62705
## BRCA2 0.40503 1.49934 0.08722 4.644 3.42e-06 ***
## CDH1 0.15621 1.16908 0.21986 0.711 0.47739
## PTEN 0.27725 1.31950 0.05365 5.168 2.36e-07 ***
## STK11 0.19620 1.21677 0.08226 2.385 0.01708 *
## TP53 0.38638 1.47164 0.18901 2.044 0.04093 *
## ATM 0.01648 1.01662 0.09562 0.172 0.86313
## BARD1 -0.10585 0.89956 0.12528 -0.845 0.39817
## CASP8 -0.17005 0.84362 0.22217 -0.765 0.44403
## CTLA4 0.13552 1.14513 0.05507 2.461 0.01386 *
## CYP19A1 -0.07757 0.92536 0.04898 -1.584 0.11330
## FGFR2 0.01954 1.01973 0.03644 0.536 0.59187
## LSP1 -0.21961 0.80283 0.07875 -2.789 0.00529 **
## MAP3K1 0.05887 1.06064 0.09135 0.644 0.51931
## NBN 0.19444 1.21463 0.08128 2.392 0.01675 *
## RAD51 0.07100 1.07358 0.20395 0.348 0.72776
## TERT -0.21812 0.80403 0.06634 -3.288 0.00101 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BRCA1 0.8972 1.1146 0.5792 1.3898
## BRCA2 1.4993 0.6670 1.2637 1.7789
## CDH1 1.1691 0.8554 0.7598 1.7988
## PTEN 1.3195 0.7579 1.1878 1.4658
## STK11 1.2168 0.8218 1.0356 1.4297
## TP53 1.4716 0.6795 1.0161 2.1315
## ATM 1.0166 0.9837 0.8429 1.2262
## BARD1 0.8996 1.1117 0.7037 1.1499
## CASP8 0.8436 1.1854 0.5458 1.3039
## CTLA4 1.1451 0.8733 1.0280 1.2757
## CYP19A1 0.9254 1.0807 0.8407 1.0186
## FGFR2 1.0197 0.9807 0.9494 1.0952
## LSP1 0.8028 1.2456 0.6880 0.9368
## MAP3K1 1.0606 0.9428 0.8868 1.2686
## NBN 1.2146 0.8233 1.0358 1.4244
## RAD51 1.0736 0.9315 0.7198 1.6012
## TERT 0.8040 1.2437 0.7060 0.9157
##
## Concordance= 0.599 (se = 0.009 )
## Rsquare= 0.045 (max possible= 1 )
## Likelihood ratio test= 88.22 on 17 df, p=1e-11
## Wald test = 89.98 on 17 df, p=6e-12
## Score (logrank) test = 90.1 on 17 df, p=6e-12
Using a cox model based on a list of genes that was found to have an impact on breast cancer (source: https://www.nationalbreastcancer.org/other-breast-cancer-genes), our model says that the most significant genes are BCRA2, PTEN, STK11, TP53, ATM, CTLA4, LSP1, NBN, and TERT.
geneclincox2 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA2 + PTEN + STK11 +TP53 + ATM + CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = geneclinical)
summary(geneclincox2)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA2 + PTEN + STK11 +
## TP53 + ATM + CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY +
## NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS +
## CHEMOTHERAPY, data = geneclinical)
##
## n= 1904, number of events= 1103
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BRCA2 0.112928 1.119551 0.092206 1.225 0.220675
## PTEN 0.239694 1.270861 0.066874 3.584 0.000338
## STK11 0.120160 1.127677 0.080362 1.495 0.134853
## TP53 0.415365 1.514923 0.188029 2.209 0.027171
## ATM 0.170071 1.185389 0.097170 1.750 0.080075
## CTLA4 0.153601 1.166026 0.055884 2.749 0.005986
## LSP1 -0.261818 0.769651 0.080128 -3.267 0.001085
## NBN 0.203465 1.225642 0.082080 2.479 0.013180
## TERT -0.203051 0.816236 0.067096 -3.026 0.002476
## INTCLUST10 -0.613731 0.541327 0.175168 -3.504 0.000459
## INTCLUST2 0.102642 1.108094 0.188927 0.543 0.586931
## INTCLUST3 -0.247156 0.781019 0.158267 -1.562 0.118374
## INTCLUST4ER- -0.258340 0.772332 0.210216 -1.229 0.219098
## INTCLUST4ER+ -0.204196 0.815303 0.167388 -1.220 0.222504
## INTCLUST5 0.068813 1.071236 0.158224 0.435 0.663630
## INTCLUST6 -0.083801 0.919614 0.180701 -0.464 0.642822
## INTCLUST7 -0.259875 0.771148 0.164251 -1.582 0.113607
## INTCLUST8 -0.131000 0.877218 0.150574 -0.870 0.384300
## INTCLUST9 -0.105584 0.899799 0.162356 -0.650 0.515483
## LATERALITYnull 0.639837 1.896171 0.128277 4.988 6.10e-07
## LATERALITYr -0.058580 0.943103 0.063723 -0.919 0.357942
## NPI 0.206454 1.229312 0.031896 6.473 9.62e-11
## INFERRED_MENOPAUSAL_STATEpre 0.510982 1.666928 0.120274 4.248 2.15e-05
## BREAST_SURGERYMASTECTOMY 0.299936 1.349773 0.066680 4.498 6.85e-06
## BREAST_SURGERYnull 0.512199 1.668957 0.295466 1.734 0.083002
## AGE_AT_DIAGNOSIS 0.052767 1.054184 0.003881 13.598 < 2e-16
## CHEMOTHERAPYYES 0.402654 1.495789 0.100704 3.998 6.38e-05
##
## BRCA2
## PTEN ***
## STK11
## TP53 *
## ATM .
## CTLA4 **
## LSP1 **
## NBN *
## TERT **
## INTCLUST10 ***
## INTCLUST2
## INTCLUST3
## INTCLUST4ER-
## INTCLUST4ER+
## INTCLUST5
## INTCLUST6
## INTCLUST7
## INTCLUST8
## INTCLUST9
## LATERALITYnull ***
## LATERALITYr
## NPI ***
## INFERRED_MENOPAUSAL_STATEpre ***
## BREAST_SURGERYMASTECTOMY ***
## BREAST_SURGERYnull .
## AGE_AT_DIAGNOSIS ***
## CHEMOTHERAPYYES ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BRCA2 1.1196 0.8932 0.9345 1.3413
## PTEN 1.2709 0.7869 1.1147 1.4488
## STK11 1.1277 0.8868 0.9633 1.3200
## TP53 1.5149 0.6601 1.0479 2.1900
## ATM 1.1854 0.8436 0.9798 1.4341
## CTLA4 1.1660 0.8576 1.0451 1.3010
## LSP1 0.7697 1.2993 0.6578 0.9005
## NBN 1.2256 0.8159 1.0435 1.4396
## TERT 0.8162 1.2251 0.7157 0.9310
## INTCLUST10 0.5413 1.8473 0.3840 0.7631
## INTCLUST2 1.1081 0.9025 0.7652 1.6047
## INTCLUST3 0.7810 1.2804 0.5727 1.0651
## INTCLUST4ER- 0.7723 1.2948 0.5115 1.1661
## INTCLUST4ER+ 0.8153 1.2265 0.5873 1.1319
## INTCLUST5 1.0712 0.9335 0.7856 1.4607
## INTCLUST6 0.9196 1.0874 0.6453 1.3104
## INTCLUST7 0.7711 1.2968 0.5589 1.0640
## INTCLUST8 0.8772 1.1400 0.6530 1.1784
## INTCLUST9 0.8998 1.1114 0.6546 1.2369
## LATERALITYnull 1.8962 0.5274 1.4746 2.4382
## LATERALITYr 0.9431 1.0603 0.8324 1.0686
## NPI 1.2293 0.8135 1.1548 1.3086
## INFERRED_MENOPAUSAL_STATEpre 1.6669 0.5999 1.3169 2.1101
## BREAST_SURGERYMASTECTOMY 1.3498 0.7409 1.1844 1.5382
## BREAST_SURGERYnull 1.6690 0.5992 0.9353 2.9781
## AGE_AT_DIAGNOSIS 1.0542 0.9486 1.0462 1.0622
## CHEMOTHERAPYYES 1.4958 0.6685 1.2279 1.8222
##
## Concordance= 0.684 (se = 0.009 )
## Rsquare= 0.225 (max possible= 1 )
## Likelihood ratio test= 484.6 on 27 df, p=<2e-16
## Wald test = 476.2 on 27 df, p=<2e-16
## Score (logrank) test = 493.3 on 27 df, p=<2e-16
This model shows a number of significant covariates, including genes such as PTEN, TP53, CTLA4, LSP1, NBN, TERT along with INTCLUST10, LATERALITY = null, NPI, Inferred menopausal state = pre menopausal, having a mastectomy, age at diagnosis, and chemotherapy yes. Another model will be fit with this and we will check with AIC to see which has the better fit.
geneclincox3 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ PTEN + TP53 + CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = geneclinical)
AIC(geneclincox2, geneclincox3)
## df AIC
## geneclincox2 27 14647.34
## geneclincox3 24 14648.57
Interestingly, the AIC model shows that the model with all our covarites and genes is slightly better at explaining our information than our model selected only by significant covariates. This would suggest that aribtrarily removing variables from our model is not a good thing. We can compare both to our original model as well
AIC(geneclincox, geneclincox2, geneclincox3)
## df AIC
## geneclincox 17 15023.72
## geneclincox2 27 14647.34
## geneclincox3 24 14648.57
Showing that both of our models with our gene data set combined with our clinical data set is better at explaining our information over the one that does not include this information. This seems to suggest that a combination of physical issues as well as genetic issues are needed to explain survival of breast cancer patients, which clinically could be useful. This would help to explain to researchers that only focusing on genetic or physical characteristics would not get the whole story when investigating this topic.
When our gene data set was added to our clinical data set, we saw that intclust 10 was the only intclust that showed to be significant. With this we can now look at the genes within patients limited to intclust 10 and see which may have an impact.
G10 <- subset(geneclinical, INTCLUST == 10)
genecoxclinicalL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT+ LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = G10)
summary(genecoxclinicalL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 +
## CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 +
## CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT + LATERALITY +
## NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS +
## CHEMOTHERAPY, data = G10)
##
## n= 219, number of events= 104
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BRCA1 -0.028281 0.972115 0.846902 -0.033 0.97336
## BRCA2 0.294597 1.342585 0.294755 0.999 0.31757
## CDH1 0.479831 1.615801 0.815985 0.588 0.55651
## PTEN 0.144698 1.155691 0.198833 0.728 0.46677
## STK11 0.346679 1.414362 0.256884 1.350 0.17716
## TP53 0.025830 1.026167 0.584406 0.044 0.96475
## ATM 0.105604 1.111381 0.258763 0.408 0.68319
## BARD1 -0.037027 0.963650 0.363144 -0.102 0.91879
## CASP8 -0.730440 0.481697 0.753662 -0.969 0.33245
## CTLA4 -0.032703 0.967825 0.157262 -0.208 0.83526
## CYP19A1 -0.001335 0.998666 0.152202 -0.009 0.99300
## FGFR2 -0.074534 0.928176 0.129162 -0.577 0.56390
## LSP1 -0.251084 0.777957 0.244325 -1.028 0.30411
## MAP3K1 -0.492720 0.610962 0.386783 -1.274 0.20270
## NBN 0.257665 1.293905 0.258267 0.998 0.31844
## RAD51 -0.197629 0.820674 0.584996 -0.338 0.73549
## TERT -0.246571 0.781476 0.260270 -0.947 0.34345
## LATERALITYnull 1.029540 2.799778 0.394833 2.608 0.00912
## LATERALITYr 0.128532 1.137157 0.227770 0.564 0.57255
## NPI 0.216408 1.241609 0.185431 1.167 0.24319
## INFERRED_MENOPAUSAL_STATEpre -0.008909 0.991131 0.378064 -0.024 0.98120
## BREAST_SURGERYMASTECTOMY 0.196144 1.216702 0.224739 0.873 0.38279
## BREAST_SURGERYnull 1.032426 2.807868 0.655793 1.574 0.11541
## AGE_AT_DIAGNOSIS 0.024687 1.024994 0.015290 1.615 0.10639
## CHEMOTHERAPYYES 0.340553 1.405725 0.301912 1.128 0.25932
##
## BRCA1
## BRCA2
## CDH1
## PTEN
## STK11
## TP53
## ATM
## BARD1
## CASP8
## CTLA4
## CYP19A1
## FGFR2
## LSP1
## MAP3K1
## NBN
## RAD51
## TERT
## LATERALITYnull **
## LATERALITYr
## NPI
## INFERRED_MENOPAUSAL_STATEpre
## BREAST_SURGERYMASTECTOMY
## BREAST_SURGERYnull
## AGE_AT_DIAGNOSIS
## CHEMOTHERAPYYES
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BRCA1 0.9721 1.0287 0.1849 5.112
## BRCA2 1.3426 0.7448 0.7534 2.392
## CDH1 1.6158 0.6189 0.3265 7.998
## PTEN 1.1557 0.8653 0.7827 1.706
## STK11 1.4144 0.7070 0.8549 2.340
## TP53 1.0262 0.9745 0.3264 3.226
## ATM 1.1114 0.8998 0.6693 1.846
## BARD1 0.9637 1.0377 0.4729 1.963
## CASP8 0.4817 2.0760 0.1100 2.110
## CTLA4 0.9678 1.0332 0.7111 1.317
## CYP19A1 0.9987 1.0013 0.7411 1.346
## FGFR2 0.9282 1.0774 0.7206 1.196
## LSP1 0.7780 1.2854 0.4819 1.256
## MAP3K1 0.6110 1.6368 0.2863 1.304
## NBN 1.2939 0.7729 0.7799 2.147
## RAD51 0.8207 1.2185 0.2607 2.583
## TERT 0.7815 1.2796 0.4692 1.302
## LATERALITYnull 2.7998 0.3572 1.2913 6.070
## LATERALITYr 1.1372 0.8794 0.7277 1.777
## NPI 1.2416 0.8054 0.8633 1.786
## INFERRED_MENOPAUSAL_STATEpre 0.9911 1.0089 0.4724 2.079
## BREAST_SURGERYMASTECTOMY 1.2167 0.8219 0.7832 1.890
## BREAST_SURGERYnull 2.8079 0.3561 0.7765 10.153
## AGE_AT_DIAGNOSIS 1.0250 0.9756 0.9947 1.056
## CHEMOTHERAPYYES 1.4057 0.7114 0.7779 2.540
##
## Concordance= 0.662 (se = 0.027 )
## Rsquare= 0.163 (max possible= 0.99 )
## Likelihood ratio test= 38.85 on 25 df, p=0.04
## Wald test = 43.8 on 25 df, p=0.01
## Score (logrank) test = 50.32 on 25 df, p=0.002
Oddly enough, this shows no genes are significant, and laterality = null is the only significant variable. I will attempt this again removing clinical data set covairates and only looking at genes.
genecoxclinicalL102 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = G10)
summary(genecoxclinicalL102)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 +
## CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 +
## CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = G10)
##
## n= 219, number of events= 104
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BRCA1 0.08984 1.09400 0.82256 0.109 0.9130
## BRCA2 0.41490 1.51421 0.27534 1.507 0.1318
## CDH1 0.05918 1.06096 0.79621 0.074 0.9408
## PTEN 0.14252 1.15318 0.19215 0.742 0.4583
## STK11 0.43541 1.54560 0.24991 1.742 0.0815 .
## TP53 0.31930 1.37616 0.57077 0.559 0.5759
## ATM 0.11560 1.12254 0.23840 0.485 0.6278
## BARD1 -0.40478 0.66713 0.34754 -1.165 0.2441
## CASP8 -0.51568 0.59709 0.73941 -0.697 0.4855
## CTLA4 -0.04635 0.95471 0.15271 -0.304 0.7615
## CYP19A1 -0.13210 0.87626 0.14847 -0.890 0.3736
## FGFR2 -0.03510 0.96551 0.12826 -0.274 0.7843
## LSP1 -0.13136 0.87690 0.23994 -0.547 0.5841
## MAP3K1 -0.56580 0.56790 0.36443 -1.553 0.1205
## NBN 0.32584 1.38520 0.24493 1.330 0.1834
## RAD51 0.06416 1.06627 0.55446 0.116 0.9079
## TERT -0.42513 0.65369 0.25350 -1.677 0.0935 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BRCA1 1.0940 0.9141 0.2182 5.485
## BRCA2 1.5142 0.6604 0.8827 2.598
## CDH1 1.0610 0.9425 0.2228 5.052
## PTEN 1.1532 0.8672 0.7913 1.681
## STK11 1.5456 0.6470 0.9471 2.522
## TP53 1.3762 0.7267 0.4496 4.212
## ATM 1.1225 0.8908 0.7035 1.791
## BARD1 0.6671 1.4990 0.3376 1.318
## CASP8 0.5971 1.6748 0.1402 2.543
## CTLA4 0.9547 1.0474 0.7078 1.288
## CYP19A1 0.8763 1.1412 0.6550 1.172
## FGFR2 0.9655 1.0357 0.7509 1.241
## LSP1 0.8769 1.1404 0.5479 1.403
## MAP3K1 0.5679 1.7609 0.2780 1.160
## NBN 1.3852 0.7219 0.8571 2.239
## RAD51 1.0663 0.9379 0.3597 3.161
## TERT 0.6537 1.5298 0.3977 1.074
##
## Concordance= 0.607 (se = 0.031 )
## Rsquare= 0.073 (max possible= 0.99 )
## Likelihood ratio test= 16.58 on 17 df, p=0.5
## Wald test = 17.26 on 17 df, p=0.4
## Score (logrank) test = 16.99 on 17 df, p=0.5
This again shows that no genes are significant in survival rates with patients with intclust 10. Taking from what we saw before, maybe there is an area for exploration as to why intclust 10 seems to have a higher survival curve than most other intclust patients, and why the genes associated with breast cancer seem to not play a significant role with these patients.
Gn10 <- subset(geneclinical, INTCLUST =! 10)
genecoxclinicalnL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT+ LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = Gn10)
summary(genecoxclinicalnL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 +
## CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 +
## CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT + LATERALITY +
## NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS +
## CHEMOTHERAPY, data = Gn10)
##
## n= 1904, number of events= 1103
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BRCA1 -0.091805 0.912283 0.223277 -0.411 0.680947
## BRCA2 0.200737 1.222303 0.088790 2.261 0.023772
## CDH1 0.189967 1.209209 0.217097 0.875 0.381558
## PTEN 0.188517 1.207458 0.056694 3.325 0.000884
## STK11 0.191844 1.211482 0.082760 2.318 0.020445
## TP53 0.333706 1.396133 0.188175 1.773 0.076165
## ATM 0.110259 1.116567 0.093972 1.173 0.240667
## BARD1 -0.156496 0.855135 0.124690 -1.255 0.209449
## CASP8 -0.248624 0.779873 0.225689 -1.102 0.270625
## CTLA4 0.133297 1.142589 0.054824 2.431 0.015042
## CYP19A1 -0.033263 0.967284 0.048323 -0.688 0.491230
## FGFR2 0.061420 1.063345 0.036370 1.689 0.091263
## LSP1 -0.281922 0.754333 0.079109 -3.564 0.000366
## MAP3K1 -0.017227 0.982921 0.088798 -0.194 0.846177
## NBN 0.189548 1.208703 0.081095 2.337 0.019420
## RAD51 -0.053060 0.948323 0.202165 -0.262 0.792969
## TERT -0.150728 0.860082 0.066923 -2.252 0.024305
## LATERALITYnull 0.603814 1.829081 0.129596 4.659 3.17e-06
## LATERALITYr -0.074757 0.927969 0.063113 -1.185 0.236213
## NPI 0.226434 1.254119 0.031596 7.167 7.69e-13
## INFERRED_MENOPAUSAL_STATEpre 0.460658 1.585117 0.120118 3.835 0.000126
## BREAST_SURGERYMASTECTOMY 0.310213 1.363715 0.066185 4.687 2.77e-06
## BREAST_SURGERYnull 0.660326 1.935423 0.293005 2.254 0.024219
## AGE_AT_DIAGNOSIS 0.052394 1.053791 0.003879 13.509 < 2e-16
## CHEMOTHERAPYYES 0.386462 1.471765 0.099187 3.896 9.77e-05
##
## BRCA1
## BRCA2 *
## CDH1
## PTEN ***
## STK11 *
## TP53 .
## ATM
## BARD1
## CASP8
## CTLA4 *
## CYP19A1
## FGFR2 .
## LSP1 ***
## MAP3K1
## NBN *
## RAD51
## TERT *
## LATERALITYnull ***
## LATERALITYr
## NPI ***
## INFERRED_MENOPAUSAL_STATEpre ***
## BREAST_SURGERYMASTECTOMY ***
## BREAST_SURGERYnull *
## AGE_AT_DIAGNOSIS ***
## CHEMOTHERAPYYES ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BRCA1 0.9123 1.0962 0.5889 1.4131
## BRCA2 1.2223 0.8181 1.0271 1.4546
## CDH1 1.2092 0.8270 0.7901 1.8505
## PTEN 1.2075 0.8282 1.0805 1.3494
## STK11 1.2115 0.8254 1.0301 1.4248
## TP53 1.3961 0.7163 0.9655 2.0188
## ATM 1.1166 0.8956 0.9287 1.3424
## BARD1 0.8551 1.1694 0.6697 1.0919
## CASP8 0.7799 1.2823 0.5011 1.2138
## CTLA4 1.1426 0.8752 1.0262 1.2722
## CYP19A1 0.9673 1.0338 0.8799 1.0634
## FGFR2 1.0633 0.9404 0.9902 1.1419
## LSP1 0.7543 1.3257 0.6460 0.8808
## MAP3K1 0.9829 1.0174 0.8259 1.1698
## NBN 1.2087 0.8273 1.0311 1.4169
## RAD51 0.9483 1.0545 0.6381 1.4094
## TERT 0.8601 1.1627 0.7544 0.9806
## LATERALITYnull 1.8291 0.5467 1.4188 2.3580
## LATERALITYr 0.9280 1.0776 0.8200 1.0502
## NPI 1.2541 0.7974 1.1788 1.3342
## INFERRED_MENOPAUSAL_STATEpre 1.5851 0.6309 1.2526 2.0059
## BREAST_SURGERYMASTECTOMY 1.3637 0.7333 1.1978 1.5526
## BREAST_SURGERYnull 1.9354 0.5167 1.0899 3.4370
## AGE_AT_DIAGNOSIS 1.0538 0.9490 1.0458 1.0618
## CHEMOTHERAPYYES 1.4718 0.6795 1.2117 1.7876
##
## Concordance= 0.678 (se = 0.008 )
## Rsquare= 0.215 (max possible= 1 )
## Likelihood ratio test= 460 on 25 df, p=<2e-16
## Wald test = 450.5 on 25 df, p=<2e-16
## Score (logrank) test = 466.7 on 25 df, p=<2e-16
Opposite of the INTCLUST 10 subset, for all other patients who do not have INTCLUST 10, we see a number of significant genes including BRCA2, PTEN, STK11, CTLA4, LSP1, NRN, and TERT. This could be useful information to test to see why INTCLUST 10 does not seem to have their survival rates affected by certain genes, but the others do. Possibly this could be used as a future area of study for these types of patients.