knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(survival)
library(survminer)
## Warning: package 'survminer' was built under R version 3.5.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: magrittr
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.5.3

Introduction

This is a survival analysis done on a breast cancer dataset.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

clinical <- readRDS("C:/Users/Dustin and Morgan/Desktop/clinical.rds")
clinical$OS_STATUS <- as.numeric(clinical$OS_STATUS)
clinical$OS_STATUS[clinical$OS_STATUS == "DECEASED"] = "1"
clinical$OS_STATUS[clinical$OS_STATUS == "LIVING"] = "0"
clinical$OS_STATUS[clinical$OS_STATUS == "2"] = 0
clinical$OS_STATUS <- as.numeric(clinical$OS_STATUS)
summary(clinical)
##   PATIENT_ID          OS_MONTHS        OS_STATUS     
##  Length:1904        Min.   :  0.00   Min.   :0.0000  
##  Class :character   1st Qu.: 60.83   1st Qu.:0.0000  
##  Mode  :character   Median :114.90   Median :1.0000  
##                     Mean   :125.03   Mean   :0.5793  
##                     3rd Qu.:184.47   3rd Qu.:1.0000  
##                     Max.   :355.20   Max.   :1.0000  
##                                                      
##                VITAL_STATUS    INTCLUST       COHORT      AGE_AT_DIAGNOSIS
##  Died of Disease     :622   8      :289   Min.   :1.000   Min.   :21.93   
##  Died of Other Causes:480   3      :282   1st Qu.:1.000   1st Qu.:51.38   
##  Living              :801   4ER+   :244   Median :3.000   Median :61.77   
##  NA's                :  1   10     :219   Mean   :2.644   Mean   :61.09   
##                             5      :184   3rd Qu.:3.000   3rd Qu.:70.59   
##                             7      :182   Max.   :5.000   Max.   :96.29   
##                             (Other):504                                   
##  LATERALITY      NPI         ER_IHC     INFERRED_MENOPAUSAL_STATE
##  l   :935   Min.   :1.000   neg : 429   post:1493                
##  null:106   1st Qu.:3.046   pos :1445   pre : 411                
##  r   :863   Median :4.042   NA's:  30                            
##             Mean   :4.033                                        
##             3rd Qu.:5.040                                        
##             Max.   :6.360                                        
##                                                                  
##            BREAST_SURGERY   CELLULARITY  HER2_SNP6   
##  BREAST CONSERVING: 755   high    :939   GAIN : 417  
##  MASTECTOMY       :1127   low     :200   LOSS : 100  
##  null             :  22   moderate:711   NEUT :1383  
##                           null    : 54   UNDEF:   4  
##                                                      
##                                                      
##                                                      
##                  THREEGENE      CLAUDIN_SUBTYPE CHEMOTHERAPY
##  ER-/HER2-            :290   Basal      :199    NO :1508    
##  ER+/HER2- High Prolif:603   claudin-low:199    YES: 396    
##  ER+/HER2- Low Prolif :619   Her2       :220                
##  HER2+                :188   LumA       :679                
##  null                 :204   LumB       :461                
##                              NC         :  6                
##                              Normal     :140                
##  HORMONE_THERAPY RADIO_THERAPY HISTOLOGICAL_SUBTYPE
##  NO : 730        NO : 767      IDC    :1500        
##  YES:1174        YES:1137      ILC    : 141        
##                                IDC+ILC:  87        
##                                IDC-TUB:  67        
##                                IDC-MUC:  42        
##                                IDC-MED:  31        
##                                (Other):  36

This shows our variables for the data set, which include the timeframe (OS_Months), the status of the patient (OS_STATUS) where 0 is alive and 1 is deceased. Other variables are Intclust, cohort, age at diagnosis, laterality (indicating which breast the cancer was found), npi (Nottingham Prognostic Index), ER IHC (Where positive shows cancer cells grow in response to estrogen), Inferred menopausal state, the type of Breast Surgery performed, Cellularity (% of tumor volume occupied by invasive tumor cells), HER2_SNP6 (human epidermal growth factor receptor 2), THREEGENE, Claudin subtype, chemothereapy, hormon thereapy, radio therapy, and histological subtype.

Cox Proportional Hazards

Cox Proportional Hazards was used to see which of these variables are signficant to survival in breast cancer

coxclinical <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + COHORT + AGE_AT_DIAGNOSIS + LATERALITY + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = clinical)
## Warning in fitter(X, Y, strats, offset, init, control, weights = weights, :
## Loglik converged before variable 39 ; beta may be infinite.
summary(coxclinical)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + COHORT + 
##     AGE_AT_DIAGNOSIS + LATERALITY + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + 
##     BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + 
##     CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, 
##     data = clinical)
## 
##   n= 1874, number of events= 1089 
##    (30 observations deleted due to missingness)
## 
##                                                        coef  exp(coef)
## INTCLUST10                                        -0.400162   0.670211
## INTCLUST2                                          0.092095   1.096469
## INTCLUST3                                         -0.174030   0.840272
## INTCLUST4ER-                                      -0.231097   0.793663
## INTCLUST4ER+                                      -0.128344   0.879551
## INTCLUST5                                          0.551684   1.736175
## INTCLUST6                                          0.017199   1.017348
## INTCLUST7                                         -0.194490   0.823255
## INTCLUST8                                         -0.082124   0.921158
## INTCLUST9                                          0.058605   1.060356
## COHORT                                             0.036019   1.036676
## AGE_AT_DIAGNOSIS                                   0.053083   1.054517
## LATERALITYnull                                     0.640261   1.896976
## LATERALITYr                                       -0.102772   0.902333
## NPI                                                0.232808   1.262139
## ER_IHCpos                                         -0.226524   0.797301
## INFERRED_MENOPAUSAL_STATEpre                       0.514892   1.673458
## BREAST_SURGERYMASTECTOMY                           0.249966   1.283982
## BREAST_SURGERYnull                                 0.401878   1.494629
## CELLULARITYlow                                     0.110008   1.116287
## CELLULARITYmoderate                                0.037290   1.037994
## CELLULARITYnull                                    0.011039   1.011100
## HER2_SNP6LOSS                                      0.062863   1.064881
## HER2_SNP6NEUT                                      0.015280   1.015397
## HER2_SNP6UNDEF                                    -0.196452   0.821640
## THREEGENEER+/HER2- High Prolif                     0.018773   1.018951
## THREEGENEER+/HER2- Low Prolif                     -0.043477   0.957455
## THREEGENEHER2+                                    -0.408924   0.664365
## THREEGENEnull                                     -0.001178   0.998822
## CLAUDIN_SUBTYPEclaudin-low                        -0.195379   0.822523
## CLAUDIN_SUBTYPEHer2                               -0.049823   0.951397
## CLAUDIN_SUBTYPELumA                               -0.156718   0.854945
## CLAUDIN_SUBTYPELumB                               -0.013972   0.986125
## CLAUDIN_SUBTYPENC                                  0.123037   1.130926
## CLAUDIN_SUBTYPENormal                              0.063251   1.065295
## CHEMOTHERAPYYES                                    0.393407   1.482022
## HORMONE_THERAPYYES                                -0.057645   0.943985
## RADIO_THERAPYYES                                  -0.080586   0.922576
## HISTOLOGICAL_SUBTYPEDCIS                          -6.749872   0.001171
## HISTOLOGICAL_SUBTYPEIDC                            0.120307   1.127843
## HISTOLOGICAL_SUBTYPEIDC-MED                       -0.328760   0.719816
## HISTOLOGICAL_SUBTYPEIDC-MUC                        0.188385   1.207298
## HISTOLOGICAL_SUBTYPEIDC-TUB                       -0.117070   0.889523
## HISTOLOGICAL_SUBTYPEIDC+ILC                        0.261433   1.298790
## HISTOLOGICAL_SUBTYPEILC                            0.381559   1.464566
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR               -0.587939   0.555471
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE  -1.105652   0.330995
## HISTOLOGICAL_SUBTYPEnull                          -0.031327   0.969159
## HISTOLOGICAL_SUBTYPEOTHER                         -0.427040   0.652437
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                       NA         NA
## HISTOLOGICAL_SUBTYPEPHYL                                 NA         NA
##                                                    se(coef)      z
## INTCLUST10                                         0.192484 -2.079
## INTCLUST2                                          0.190409  0.484
## INTCLUST3                                          0.160485 -1.084
## INTCLUST4ER-                                       0.229561 -1.007
## INTCLUST4ER+                                       0.166020 -0.773
## INTCLUST5                                          0.211542  2.608
## INTCLUST6                                          0.179034  0.096
## INTCLUST7                                          0.164340 -1.183
## INTCLUST8                                          0.152720 -0.538
## INTCLUST9                                          0.161633  0.363
## COHORT                                             0.028394  1.269
## AGE_AT_DIAGNOSIS                                   0.003975 13.353
## LATERALITYnull                                     0.133339  4.802
## LATERALITYr                                        0.064413 -1.596
## NPI                                                0.035971  6.472
## ER_IHCpos                                          0.139413 -1.625
## INFERRED_MENOPAUSAL_STATEpre                       0.122973  4.187
## BREAST_SURGERYMASTECTOMY                           0.081581  3.064
## BREAST_SURGERYnull                                 0.314965  1.276
## CELLULARITYlow                                     0.115241  0.955
## CELLULARITYmoderate                                0.069232  0.539
## CELLULARITYnull                                    0.210071  0.053
## HER2_SNP6LOSS                                      0.171567  0.366
## HER2_SNP6NEUT                                      0.104886  0.146
## HER2_SNP6UNDEF                                     0.594983 -0.330
## THREEGENEER+/HER2- High Prolif                     0.160394  0.117
## THREEGENEER+/HER2- Low Prolif                      0.163353 -0.266
## THREEGENEHER2+                                     0.217989 -1.876
## THREEGENEnull                                      0.153684 -0.008
## CLAUDIN_SUBTYPEclaudin-low                         0.159251 -1.227
## CLAUDIN_SUBTYPEHer2                                0.160328 -0.311
## CLAUDIN_SUBTYPELumA                                0.171453 -0.914
## CLAUDIN_SUBTYPELumB                                0.171688 -0.081
## CLAUDIN_SUBTYPENC                                  0.480743  0.256
## CLAUDIN_SUBTYPENormal                              0.191390  0.330
## CHEMOTHERAPYYES                                    0.113076  3.479
## HORMONE_THERAPYYES                                 0.077550 -0.743
## RADIO_THERAPYYES                                   0.082288 -0.979
## HISTOLOGICAL_SUBTYPEDCIS                         718.427982 -0.009
## HISTOLOGICAL_SUBTYPEIDC                            0.583952  0.206
## HISTOLOGICAL_SUBTYPEIDC-MED                        0.647502 -0.508
## HISTOLOGICAL_SUBTYPEIDC-MUC                        0.626245  0.301
## HISTOLOGICAL_SUBTYPEIDC-TUB                        0.615707 -0.190
## HISTOLOGICAL_SUBTYPEIDC+ILC                        0.600519  0.435
## HISTOLOGICAL_SUBTYPEILC                            0.591833  0.645
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                0.774588 -0.759
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE   1.162556 -0.951
## HISTOLOGICAL_SUBTYPEnull                           1.167941 -0.027
## HISTOLOGICAL_SUBTYPEOTHER                          0.771826 -0.553
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                 0.000000     NA
## HISTOLOGICAL_SUBTYPEPHYL                           0.000000     NA
##                                                  Pr(>|z|)    
## INTCLUST10                                       0.037623 *  
## INTCLUST2                                        0.628619    
## INTCLUST3                                        0.278189    
## INTCLUST4ER-                                     0.314083    
## INTCLUST4ER+                                     0.439485    
## INTCLUST5                                        0.009109 ** 
## INTCLUST6                                        0.923468    
## INTCLUST7                                        0.236628    
## INTCLUST8                                        0.590755    
## INTCLUST9                                        0.716918    
## COHORT                                           0.204608    
## AGE_AT_DIAGNOSIS                                  < 2e-16 ***
## LATERALITYnull                                   1.57e-06 ***
## LATERALITYr                                      0.110597    
## NPI                                              9.67e-11 ***
## ER_IHCpos                                        0.104197    
## INFERRED_MENOPAUSAL_STATEpre                     2.83e-05 ***
## BREAST_SURGERYMASTECTOMY                         0.002184 ** 
## BREAST_SURGERYnull                               0.201974    
## CELLULARITYlow                                   0.339786    
## CELLULARITYmoderate                              0.590144    
## CELLULARITYnull                                  0.958091    
## HER2_SNP6LOSS                                    0.714062    
## HER2_SNP6NEUT                                    0.884176    
## HER2_SNP6UNDEF                                   0.741263    
## THREEGENEER+/HER2- High Prolif                   0.906824    
## THREEGENEER+/HER2- Low Prolif                    0.790123    
## THREEGENEHER2+                                   0.060670 .  
## THREEGENEnull                                    0.993882    
## CLAUDIN_SUBTYPEclaudin-low                       0.219875    
## CLAUDIN_SUBTYPEHer2                              0.755984    
## CLAUDIN_SUBTYPELumA                              0.360687    
## CLAUDIN_SUBTYPELumB                              0.935139    
## CLAUDIN_SUBTYPENC                                0.798005    
## CLAUDIN_SUBTYPENormal                            0.741034    
## CHEMOTHERAPYYES                                  0.000503 ***
## HORMONE_THERAPYYES                               0.457285    
## RADIO_THERAPYYES                                 0.327425    
## HISTOLOGICAL_SUBTYPEDCIS                         0.992504    
## HISTOLOGICAL_SUBTYPEIDC                          0.836774    
## HISTOLOGICAL_SUBTYPEIDC-MED                      0.611638    
## HISTOLOGICAL_SUBTYPEIDC-MUC                      0.763555    
## HISTOLOGICAL_SUBTYPEIDC-TUB                      0.849200    
## HISTOLOGICAL_SUBTYPEIDC+ILC                      0.663312    
## HISTOLOGICAL_SUBTYPEILC                          0.519117    
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR              0.447831    
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE 0.341577    
## HISTOLOGICAL_SUBTYPEnull                         0.978601    
## HISTOLOGICAL_SUBTYPEOTHER                        0.580068    
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                     NA    
## HISTOLOGICAL_SUBTYPEPHYL                               NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                                  exp(coef) exp(-coef)
## INTCLUST10                                        0.670211     1.4921
## INTCLUST2                                         1.096469     0.9120
## INTCLUST3                                         0.840272     1.1901
## INTCLUST4ER-                                      0.793663     1.2600
## INTCLUST4ER+                                      0.879551     1.1369
## INTCLUST5                                         1.736175     0.5760
## INTCLUST6                                         1.017348     0.9829
## INTCLUST7                                         0.823255     1.2147
## INTCLUST8                                         0.921158     1.0856
## INTCLUST9                                         1.060356     0.9431
## COHORT                                            1.036676     0.9646
## AGE_AT_DIAGNOSIS                                  1.054517     0.9483
## LATERALITYnull                                    1.896976     0.5272
## LATERALITYr                                       0.902333     1.1082
## NPI                                               1.262139     0.7923
## ER_IHCpos                                         0.797301     1.2542
## INFERRED_MENOPAUSAL_STATEpre                      1.673458     0.5976
## BREAST_SURGERYMASTECTOMY                          1.283982     0.7788
## BREAST_SURGERYnull                                1.494629     0.6691
## CELLULARITYlow                                    1.116287     0.8958
## CELLULARITYmoderate                               1.037994     0.9634
## CELLULARITYnull                                   1.011100     0.9890
## HER2_SNP6LOSS                                     1.064881     0.9391
## HER2_SNP6NEUT                                     1.015397     0.9848
## HER2_SNP6UNDEF                                    0.821640     1.2171
## THREEGENEER+/HER2- High Prolif                    1.018951     0.9814
## THREEGENEER+/HER2- Low Prolif                     0.957455     1.0444
## THREEGENEHER2+                                    0.664365     1.5052
## THREEGENEnull                                     0.998822     1.0012
## CLAUDIN_SUBTYPEclaudin-low                        0.822523     1.2158
## CLAUDIN_SUBTYPEHer2                               0.951397     1.0511
## CLAUDIN_SUBTYPELumA                               0.854945     1.1697
## CLAUDIN_SUBTYPELumB                               0.986125     1.0141
## CLAUDIN_SUBTYPENC                                 1.130926     0.8842
## CLAUDIN_SUBTYPENormal                             1.065295     0.9387
## CHEMOTHERAPYYES                                   1.482022     0.6748
## HORMONE_THERAPYYES                                0.943985     1.0593
## RADIO_THERAPYYES                                  0.922576     1.0839
## HISTOLOGICAL_SUBTYPEDCIS                          0.001171   853.9499
## HISTOLOGICAL_SUBTYPEIDC                           1.127843     0.8866
## HISTOLOGICAL_SUBTYPEIDC-MED                       0.719816     1.3892
## HISTOLOGICAL_SUBTYPEIDC-MUC                       1.207298     0.8283
## HISTOLOGICAL_SUBTYPEIDC-TUB                       0.889523     1.1242
## HISTOLOGICAL_SUBTYPEIDC+ILC                       1.298790     0.7699
## HISTOLOGICAL_SUBTYPEILC                           1.464566     0.6828
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR               0.555471     1.8003
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE  0.330995     3.0212
## HISTOLOGICAL_SUBTYPEnull                          0.969159     1.0318
## HISTOLOGICAL_SUBTYPEOTHER                         0.652437     1.5327
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA         NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA         NA
##                                                  lower .95 upper .95
## INTCLUST10                                         0.45959    0.9774
## INTCLUST2                                          0.75495    1.5925
## INTCLUST3                                          0.61350    1.1509
## INTCLUST4ER-                                       0.50610    1.2446
## INTCLUST4ER+                                       0.63525    1.2178
## INTCLUST5                                          1.14691    2.6282
## INTCLUST6                                          0.71627    1.4450
## INTCLUST7                                          0.59655    1.1361
## INTCLUST8                                          0.68287    1.2426
## INTCLUST9                                          0.77245    1.4556
## COHORT                                             0.98056    1.0960
## AGE_AT_DIAGNOSIS                                   1.04633    1.0628
## LATERALITYnull                                     1.46071    2.4635
## LATERALITYr                                        0.79531    1.0238
## NPI                                                1.17622    1.3543
## ER_IHCpos                                          0.60667    1.0478
## INFERRED_MENOPAUSAL_STATEpre                       1.31504    2.1296
## BREAST_SURGERYMASTECTOMY                           1.09425    1.5066
## BREAST_SURGERYnull                                 0.80619    2.7710
## CELLULARITYlow                                     0.89060    1.3992
## CELLULARITYmoderate                                0.90628    1.1888
## CELLULARITYnull                                    0.66986    1.5262
## HER2_SNP6LOSS                                      0.76079    1.4905
## HER2_SNP6NEUT                                      0.82672    1.2471
## HER2_SNP6UNDEF                                     0.25599    2.6371
## THREEGENEER+/HER2- High Prolif                     0.74409    1.3953
## THREEGENEER+/HER2- Low Prolif                      0.69514    1.3188
## THREEGENEHER2+                                     0.43337    1.0185
## THREEGENEnull                                      0.73905    1.3499
## CLAUDIN_SUBTYPEclaudin-low                         0.60199    1.1238
## CLAUDIN_SUBTYPEHer2                                0.69485    1.3027
## CLAUDIN_SUBTYPELumA                                0.61094    1.1964
## CLAUDIN_SUBTYPELumB                                0.70435    1.3806
## CLAUDIN_SUBTYPENC                                  0.44078    2.9016
## CLAUDIN_SUBTYPENormal                              0.73208    1.5502
## CHEMOTHERAPYYES                                    1.18742    1.8497
## HORMONE_THERAPYYES                                 0.81088    1.0989
## RADIO_THERAPYYES                                   0.78516    1.0840
## HISTOLOGICAL_SUBTYPEDCIS                           0.00000       Inf
## HISTOLOGICAL_SUBTYPEIDC                            0.35908    3.5425
## HISTOLOGICAL_SUBTYPEIDC-MED                        0.20233    2.5608
## HISTOLOGICAL_SUBTYPEIDC-MUC                        0.35380    4.1198
## HISTOLOGICAL_SUBTYPEIDC-TUB                        0.26611    2.9734
## HISTOLOGICAL_SUBTYPEIDC+ILC                        0.40029    4.2141
## HISTOLOGICAL_SUBTYPEILC                            0.45913    4.6717
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                0.12171    2.5351
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE   0.03390    3.2314
## HISTOLOGICAL_SUBTYPEnull                           0.09823    9.5620
## HISTOLOGICAL_SUBTYPEOTHER                          0.14373    2.9615
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA        NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA        NA
## 
## Concordance= 0.684  (se = 0.008 )
## Rsquare= 0.223   (max possible= 1 )
## Likelihood ratio test= 472.6  on 49 df,   p=<2e-16
## Wald test            = 463.8  on 49 df,   p=<2e-16
## Score (logrank) test = 484.3  on 49 df,   p=<2e-16
cox1 <- survfit(coxclinical)
autoplot(cox1)

This flags a few items as being significant to survival such as INTCLUST10, INTCLUST5, Age at diagnosis, Laterality being null, NPI, Inferred Menopausal State, Breast surgery type, and Chemotherapy being performed.

Survival Curves for Significant Covariates

Next, survival curves are made for some of the covariates indicated as significant

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CHEMOTHERAPY, data = clinical))

Oddly enough, this shows that those who did not get chemotheraphy have a bettter survival curve than those who did

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INFERRED_MENOPAUSAL_STATE, data = clinical))

This shows those who are pre-menopasual state have a better survival curve than those who are post-menopasal state. This probably directly ties into our other variable, which is age at diagnosis. For this, we will use the mean of 61 to create a pre and post 61 variable to see what the survival curves look like for them.

clinical1 <- mutate(clinical, AG = ifelse((AGE_AT_DIAGNOSIS < 61), "Under61", "Over61"))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ AG, data = clinical1))

This is in agreement with our menopause variable, women who are under 61 tend to have a better survival curve than those who are over 61.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ LATERALITY, data = clinical))

This actually shows us that which breast the cancer develops in does not appear to significantly change the survival curve. However, the null response seems to be the reason why our cox model shows it was significant. It would be interesting to know if these nulls were that of a cancer spread that didn’t have a known origin location, or just insufficient data.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST, data = clinical))

This shows the many options for the INTCLUST variable and corresponding survival curves. It is difficult to see many of the different options, but from our cox model we were told that 10 and 5 were significant. 10 appears to have a higher survival rate than most, where 5 looks like it may be the lower end curve, but the colors make it hard to distinguish, so we will look at them both vs. the overall survival rate for the rest.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST, data = subset(clinical, INTCLUST %in% c(5, 10))))

When we compare INCLUST 5 vs INTCLUST 10, we see a substantial difference in survival curves. It appears that INTCLUST 10 patients have a much better survival rate than INTCLUST 5.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST == 5, data = clinical))

This shows when we compare patients that are positive for integrative cluster 5, their overall survival rate is much lower than the rest of the patients.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST == 10, data = clinical))

This shows that if a patient is positive for integrative cluster 10, their overall survival rate at first is slightly lower than the rest, but after a certain amount of time the survival rate is better than average.

clinical$QNPI <- with(clinical, cut(NPI, 
                                 breaks=quantile(NPI, probs=seq(0,1, by=0.25), na.rm=TRUE), 
                                 include.lowest=TRUE))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ QNPI, data = clinical))

By breaking NPI into quntiles, we can see that generally as you move from Q1 towards Q4, the survival curve decreases for each quantile. This makes sense due to the NPI score is calculated by tumor size, number of involved lymph nodes, and grade of the tumor. Higher numbers of NPI seem to correspond to more advanced cancer status.

EXPLORING INTCLUST 5

Since the survival rate is much lower with those who have INTCLUST 5, I decided to subset those individuals and run a cox model on them to see if any of the covariates have a significant impact on those patients.

L5 <- subset(clinical, INTCLUST == 5)
coxclinicalL5 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS  + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L5)
summary(coxclinicalL5)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS + 
##     NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + 
##     CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + 
##     HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L5)
## 
##   n= 180, number of events= 123 
##    (4 observations deleted due to missingness)
## 
##                                                       coef exp(coef)
## COHORT                                           -0.061607  0.940252
## AGE_AT_DIAGNOSIS                                  0.005041  1.005053
## NPI                                               0.492253  1.635998
## ER_IHCpos                                        -0.057880  0.943763
## INFERRED_MENOPAUSAL_STATEpre                     -0.158717  0.853238
## BREAST_SURGERYMASTECTOMY                         -0.090428  0.913540
## BREAST_SURGERYnull                               -0.623352  0.536144
## CELLULARITYlow                                   -0.077823  0.925129
## CELLULARITYmoderate                               0.172181  1.187893
## CELLULARITYnull                                  -0.163305  0.849332
## HER2_SNP6LOSS                                           NA        NA
## HER2_SNP6NEUT                                           NA        NA
## HER2_SNP6UNDEF                                    0.192964  1.212839
## THREEGENEER+/HER2- High Prolif                   -1.454652  0.233482
## THREEGENEER+/HER2- Low Prolif                           NA        NA
## THREEGENEHER2+                                   -1.301742  0.272057
## THREEGENEnull                                    -0.971167  0.378641
## CLAUDIN_SUBTYPEclaudin-low                        0.046830  1.047944
## CLAUDIN_SUBTYPEHer2                               0.349554  1.418434
## CLAUDIN_SUBTYPELumA                               0.937926  2.554676
## CLAUDIN_SUBTYPELumB                               0.966508  2.628748
## CLAUDIN_SUBTYPENC                                       NA        NA
## CLAUDIN_SUBTYPENormal                             1.448824  4.258105
## CHEMOTHERAPYYES                                  -0.127841  0.879994
## HORMONE_THERAPYYES                               -0.905836  0.404204
## RADIO_THERAPYYES                                 -0.068000  0.934260
## HISTOLOGICAL_SUBTYPEDCIS                                NA        NA
## HISTOLOGICAL_SUBTYPEIDC                          -0.744260  0.475086
## HISTOLOGICAL_SUBTYPEIDC-MED                       0.772374  2.164901
## HISTOLOGICAL_SUBTYPEIDC-MUC                             NA        NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                      -0.850790  0.427077
## HISTOLOGICAL_SUBTYPEIDC+ILC                      -0.896735  0.407899
## HISTOLOGICAL_SUBTYPEILC                           0.315274  1.370635
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                     NA        NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE        NA        NA
## HISTOLOGICAL_SUBTYPEnull                                NA        NA
## HISTOLOGICAL_SUBTYPEOTHER                               NA        NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA        NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA        NA
##                                                   se(coef)      z Pr(>|z|)
## COHORT                                            0.096156 -0.641 0.521717
## AGE_AT_DIAGNOSIS                                  0.013242  0.381 0.703460
## NPI                                               0.147883  3.329 0.000873
## ER_IHCpos                                         0.358479 -0.161 0.871730
## INFERRED_MENOPAUSAL_STATEpre                      0.334802 -0.474 0.635455
## BREAST_SURGERYMASTECTOMY                          0.268971 -0.336 0.736720
## BREAST_SURGERYnull                                1.109507 -0.562 0.574233
## CELLULARITYlow                                    0.407842 -0.191 0.848670
## CELLULARITYmoderate                               0.239488  0.719 0.472169
## CELLULARITYnull                                   0.558759 -0.292 0.770085
## HER2_SNP6LOSS                                     0.000000     NA       NA
## HER2_SNP6NEUT                                     0.000000     NA       NA
## HER2_SNP6UNDEF                                    1.327718  0.145 0.884447
## THREEGENEER+/HER2- High Prolif                    0.823217 -1.767 0.077223
## THREEGENEER+/HER2- Low Prolif                     0.000000     NA       NA
## THREEGENEHER2+                                    0.691307 -1.883 0.059698
## THREEGENEnull                                     0.777439 -1.249 0.211596
## CLAUDIN_SUBTYPEclaudin-low                        0.653612  0.072 0.942882
## CLAUDIN_SUBTYPEHer2                               0.443426  0.788 0.430521
## CLAUDIN_SUBTYPELumA                               0.606809  1.546 0.122184
## CLAUDIN_SUBTYPELumB                               0.561971  1.720 0.085459
## CLAUDIN_SUBTYPENC                                 0.000000     NA       NA
## CLAUDIN_SUBTYPENormal                             0.608385  2.381 0.017246
## CHEMOTHERAPYYES                                   0.339974 -0.376 0.706894
## HORMONE_THERAPYYES                                0.280192 -3.233 0.001225
## RADIO_THERAPYYES                                  0.262746 -0.259 0.795784
## HISTOLOGICAL_SUBTYPEDCIS                          0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEIDC                           0.785944 -0.947 0.343657
## HISTOLOGICAL_SUBTYPEIDC-MED                       1.371640  0.563 0.573365
## HISTOLOGICAL_SUBTYPEIDC-MUC                       0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                       1.334154 -0.638 0.523669
## HISTOLOGICAL_SUBTYPEIDC+ILC                       1.318650 -0.680 0.496479
## HISTOLOGICAL_SUBTYPEILC                           1.098859  0.287 0.774181
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR               0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE  0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEnull                          0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEOTHER                         0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                0.000000     NA       NA
## HISTOLOGICAL_SUBTYPEPHYL                          0.000000     NA       NA
##                                                     
## COHORT                                              
## AGE_AT_DIAGNOSIS                                    
## NPI                                              ***
## ER_IHCpos                                           
## INFERRED_MENOPAUSAL_STATEpre                        
## BREAST_SURGERYMASTECTOMY                            
## BREAST_SURGERYnull                                  
## CELLULARITYlow                                      
## CELLULARITYmoderate                                 
## CELLULARITYnull                                     
## HER2_SNP6LOSS                                       
## HER2_SNP6NEUT                                       
## HER2_SNP6UNDEF                                      
## THREEGENEER+/HER2- High Prolif                   .  
## THREEGENEER+/HER2- Low Prolif                       
## THREEGENEHER2+                                   .  
## THREEGENEnull                                       
## CLAUDIN_SUBTYPEclaudin-low                          
## CLAUDIN_SUBTYPEHer2                                 
## CLAUDIN_SUBTYPELumA                                 
## CLAUDIN_SUBTYPELumB                              .  
## CLAUDIN_SUBTYPENC                                   
## CLAUDIN_SUBTYPENormal                            *  
## CHEMOTHERAPYYES                                     
## HORMONE_THERAPYYES                               ** 
## RADIO_THERAPYYES                                    
## HISTOLOGICAL_SUBTYPEDCIS                            
## HISTOLOGICAL_SUBTYPEIDC                             
## HISTOLOGICAL_SUBTYPEIDC-MED                         
## HISTOLOGICAL_SUBTYPEIDC-MUC                         
## HISTOLOGICAL_SUBTYPEIDC-TUB                         
## HISTOLOGICAL_SUBTYPEIDC+ILC                         
## HISTOLOGICAL_SUBTYPEILC                             
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                 
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE    
## HISTOLOGICAL_SUBTYPEnull                            
## HISTOLOGICAL_SUBTYPEOTHER                           
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                  
## HISTOLOGICAL_SUBTYPEPHYL                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                                  exp(coef) exp(-coef)
## COHORT                                              0.9403     1.0635
## AGE_AT_DIAGNOSIS                                    1.0051     0.9950
## NPI                                                 1.6360     0.6112
## ER_IHCpos                                           0.9438     1.0596
## INFERRED_MENOPAUSAL_STATEpre                        0.8532     1.1720
## BREAST_SURGERYMASTECTOMY                            0.9135     1.0946
## BREAST_SURGERYnull                                  0.5361     1.8652
## CELLULARITYlow                                      0.9251     1.0809
## CELLULARITYmoderate                                 1.1879     0.8418
## CELLULARITYnull                                     0.8493     1.1774
## HER2_SNP6LOSS                                           NA         NA
## HER2_SNP6NEUT                                           NA         NA
## HER2_SNP6UNDEF                                      1.2128     0.8245
## THREEGENEER+/HER2- High Prolif                      0.2335     4.2830
## THREEGENEER+/HER2- Low Prolif                           NA         NA
## THREEGENEHER2+                                      0.2721     3.6757
## THREEGENEnull                                       0.3786     2.6410
## CLAUDIN_SUBTYPEclaudin-low                          1.0479     0.9542
## CLAUDIN_SUBTYPEHer2                                 1.4184     0.7050
## CLAUDIN_SUBTYPELumA                                 2.5547     0.3914
## CLAUDIN_SUBTYPELumB                                 2.6287     0.3804
## CLAUDIN_SUBTYPENC                                       NA         NA
## CLAUDIN_SUBTYPENormal                               4.2581     0.2348
## CHEMOTHERAPYYES                                     0.8800     1.1364
## HORMONE_THERAPYYES                                  0.4042     2.4740
## RADIO_THERAPYYES                                    0.9343     1.0704
## HISTOLOGICAL_SUBTYPEDCIS                                NA         NA
## HISTOLOGICAL_SUBTYPEIDC                             0.4751     2.1049
## HISTOLOGICAL_SUBTYPEIDC-MED                         2.1649     0.4619
## HISTOLOGICAL_SUBTYPEIDC-MUC                             NA         NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                         0.4271     2.3415
## HISTOLOGICAL_SUBTYPEIDC+ILC                         0.4079     2.4516
## HISTOLOGICAL_SUBTYPEILC                             1.3706     0.7296
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                     NA         NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE        NA         NA
## HISTOLOGICAL_SUBTYPEnull                                NA         NA
## HISTOLOGICAL_SUBTYPEOTHER                               NA         NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA         NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA         NA
##                                                  lower .95 upper .95
## COHORT                                             0.77875     1.135
## AGE_AT_DIAGNOSIS                                   0.97930     1.031
## NPI                                                1.22435     2.186
## ER_IHCpos                                          0.46744     1.905
## INFERRED_MENOPAUSAL_STATEpre                       0.44268     1.645
## BREAST_SURGERYMASTECTOMY                           0.53924     1.548
## BREAST_SURGERYnull                                 0.06094     4.717
## CELLULARITYlow                                     0.41595     2.058
## CELLULARITYmoderate                                0.74289     1.899
## CELLULARITYnull                                    0.28409     2.539
## HER2_SNP6LOSS                                           NA        NA
## HER2_SNP6NEUT                                           NA        NA
## HER2_SNP6UNDEF                                     0.08988    16.367
## THREEGENEER+/HER2- High Prolif                     0.04651     1.172
## THREEGENEER+/HER2- Low Prolif                           NA        NA
## THREEGENEHER2+                                     0.07018     1.055
## THREEGENEnull                                      0.08250     1.738
## CLAUDIN_SUBTYPEclaudin-low                         0.29106     3.773
## CLAUDIN_SUBTYPEHer2                                0.59479     3.383
## CLAUDIN_SUBTYPELumA                                0.77771     8.392
## CLAUDIN_SUBTYPELumB                                0.87377     7.909
## CLAUDIN_SUBTYPENC                                       NA        NA
## CLAUDIN_SUBTYPENormal                              1.29228    14.031
## CHEMOTHERAPYYES                                    0.45195     1.713
## HORMONE_THERAPYYES                                 0.23340     0.700
## RADIO_THERAPYYES                                   0.55824     1.564
## HISTOLOGICAL_SUBTYPEDCIS                                NA        NA
## HISTOLOGICAL_SUBTYPEIDC                            0.10181     2.217
## HISTOLOGICAL_SUBTYPEIDC-MED                        0.14720    31.841
## HISTOLOGICAL_SUBTYPEIDC-MUC                             NA        NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                        0.03125     5.836
## HISTOLOGICAL_SUBTYPEIDC+ILC                        0.03077     5.407
## HISTOLOGICAL_SUBTYPEILC                            0.15906    11.811
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                     NA        NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE        NA        NA
## HISTOLOGICAL_SUBTYPEnull                                NA        NA
## HISTOLOGICAL_SUBTYPEOTHER                               NA        NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA        NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA        NA
## 
## Concordance= 0.665  (se = 0.026 )
## Rsquare= 0.207   (max possible= 0.998 )
## Likelihood ratio test= 41.64  on 27 df,   p=0.04
## Wald test            = 40.98  on 27 df,   p=0.04
## Score (logrank) test = 45.51  on 27 df,   p=0.01

This shows that NPI, CLAUDIN subtype normal, and having undergone hormone therapy are all significant covariates for those who are in the INTCLUST 5 group

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CLAUDIN_SUBTYPE == 'Normal', data = L5))

This seems to show if you have the claudin subtype = normal, you have a lower survival rate than the rest of the claudin subtypes if you have INTCLUST 5. However, this seems to be a very low sample size so the validity of this claim may not be valid without proper samples.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ HORMONE_THERAPY, data = L5))

Intersting here that overall, hormone therapy was shown to not be significant on breast cancer survival rates in our overall model. However, when looking at this by INTCLUST 5 patients, those who had hormone therapy had a slightly better survival rate than those who did not.

aaregclinical <- aareg(Surv(OS_MONTHS, OS_STATUS) ~ INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + CHEMOTHERAPY, data = clinical)
autoplot(aaregclinical)+theme(legend.position = "none")

This shows us how the covariates change over time. It is interesting to see some of the curves of Breast Surgery = mastectomy and a patient having chemotherapy.

EXPLORING INTCLUST10

L10 <- subset(clinical, INTCLUST == 10)
coxclinicalL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS  + NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L10)
## Warning in fitter(X, Y, strats, offset, init, control, weights = weights, :
## Loglik converged before variable 23,28,29 ; beta may be infinite.
summary(coxclinicalL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ COHORT + AGE_AT_DIAGNOSIS + 
##     NPI + ER_IHC + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + 
##     CELLULARITY + HER2_SNP6 + THREEGENE + CLAUDIN_SUBTYPE + CHEMOTHERAPY + 
##     HORMONE_THERAPY + RADIO_THERAPY + HISTOLOGICAL_SUBTYPE, data = L10)
## 
##   n= 218, number of events= 104 
##    (1 observation deleted due to missingness)
## 
##                                                        coef  exp(coef)
## COHORT                                            6.002e-02  1.062e+00
## AGE_AT_DIAGNOSIS                                  3.220e-02  1.033e+00
## NPI                                               1.689e-01  1.184e+00
## ER_IHCpos                                        -5.056e-01  6.031e-01
## INFERRED_MENOPAUSAL_STATEpre                      2.510e-01  1.285e+00
## BREAST_SURGERYMASTECTOMY                         -6.840e-02  9.339e-01
## BREAST_SURGERYnull                                1.648e+00  5.198e+00
## CELLULARITYlow                                    7.263e-01  2.067e+00
## CELLULARITYmoderate                               1.988e-01  1.220e+00
## CELLULARITYnull                                  -2.438e-01  7.836e-01
## HER2_SNP6LOSS                                    -6.661e-02  9.356e-01
## HER2_SNP6NEUT                                    -2.661e-02  9.737e-01
## HER2_SNP6UNDEF                                           NA         NA
## THREEGENEER+/HER2- High Prolif                    3.809e+00  4.509e+01
## THREEGENEER+/HER2- Low Prolif                            NA         NA
## THREEGENEHER2+                                   -3.564e-01  7.002e-01
## THREEGENEnull                                     8.386e-01  2.313e+00
## CLAUDIN_SUBTYPEclaudin-low                       -3.036e-01  7.381e-01
## CLAUDIN_SUBTYPEHer2                              -1.127e-01  8.934e-01
## CLAUDIN_SUBTYPELumA                              -1.569e+00  2.082e-01
## CLAUDIN_SUBTYPELumB                              -3.574e+00  2.803e-02
## CLAUDIN_SUBTYPENC                                        NA         NA
## CLAUDIN_SUBTYPENormal                            -1.742e+01  2.707e-08
## CHEMOTHERAPYYES                                   3.933e-01  1.482e+00
## HORMONE_THERAPYYES                                3.405e-01  1.406e+00
## RADIO_THERAPYYES                                 -2.716e-01  7.622e-01
## HISTOLOGICAL_SUBTYPEDCIS                                 NA         NA
## HISTOLOGICAL_SUBTYPEIDC                           1.773e+01  4.991e+07
## HISTOLOGICAL_SUBTYPEIDC-MED                       1.672e+01  1.817e+07
## HISTOLOGICAL_SUBTYPEIDC-MUC                              NA         NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                      -5.160e-01  5.969e-01
## HISTOLOGICAL_SUBTYPEIDC+ILC                              NA         NA
## HISTOLOGICAL_SUBTYPEILC                          -2.873e-01  7.503e-01
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                      NA         NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE         NA         NA
## HISTOLOGICAL_SUBTYPEnull                                 NA         NA
## HISTOLOGICAL_SUBTYPEOTHER                                NA         NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                       NA         NA
## HISTOLOGICAL_SUBTYPEPHYL                                 NA         NA
##                                                    se(coef)      z
## COHORT                                            9.889e-02  0.607
## AGE_AT_DIAGNOSIS                                  1.431e-02  2.251
## NPI                                               1.949e-01  0.867
## ER_IHCpos                                         4.660e-01 -1.085
## INFERRED_MENOPAUSAL_STATEpre                      3.783e-01  0.663
## BREAST_SURGERYMASTECTOMY                          2.316e-01 -0.295
## BREAST_SURGERYnull                                5.606e-01  2.940
## CELLULARITYlow                                    3.398e-01  2.138
## CELLULARITYmoderate                               2.705e-01  0.735
## CELLULARITYnull                                   1.035e+00 -0.236
## HER2_SNP6LOSS                                     5.813e-01 -0.115
## HER2_SNP6NEUT                                     3.791e-01 -0.070
## HER2_SNP6UNDEF                                    0.000e+00     NA
## THREEGENEER+/HER2- High Prolif                    1.436e+00  2.652
## THREEGENEER+/HER2- Low Prolif                     0.000e+00     NA
## THREEGENEHER2+                                    1.110e+00 -0.321
## THREEGENEnull                                     2.613e-01  3.209
## CLAUDIN_SUBTYPEclaudin-low                        2.712e-01 -1.120
## CLAUDIN_SUBTYPEHer2                               6.246e-01 -0.180
## CLAUDIN_SUBTYPELumA                               8.664e+03  0.000
## CLAUDIN_SUBTYPELumB                               1.400e+00 -2.553
## CLAUDIN_SUBTYPENC                                 0.000e+00     NA
## CLAUDIN_SUBTYPENormal                             8.223e+03 -0.002
## CHEMOTHERAPYYES                                   3.571e-01  1.101
## HORMONE_THERAPYYES                                2.436e-01  1.398
## RADIO_THERAPYYES                                  2.803e-01 -0.969
## HISTOLOGICAL_SUBTYPEDCIS                          0.000e+00     NA
## HISTOLOGICAL_SUBTYPEIDC                           6.963e+03  0.003
## HISTOLOGICAL_SUBTYPEIDC-MED                       6.963e+03  0.002
## HISTOLOGICAL_SUBTYPEIDC-MUC                       0.000e+00     NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                       9.848e+03  0.000
## HISTOLOGICAL_SUBTYPEIDC+ILC                       0.000e+00     NA
## HISTOLOGICAL_SUBTYPEILC                           8.664e+03  0.000
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR               0.000e+00     NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE  0.000e+00     NA
## HISTOLOGICAL_SUBTYPEnull                          0.000e+00     NA
## HISTOLOGICAL_SUBTYPEOTHER                         0.000e+00     NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                0.000e+00     NA
## HISTOLOGICAL_SUBTYPEPHYL                          0.000e+00     NA
##                                                  Pr(>|z|)   
## COHORT                                            0.54391   
## AGE_AT_DIAGNOSIS                                  0.02441 * 
## NPI                                               0.38607   
## ER_IHCpos                                         0.27787   
## INFERRED_MENOPAUSAL_STATEpre                      0.50709   
## BREAST_SURGERYMASTECTOMY                          0.76778   
## BREAST_SURGERYnull                                0.00328 **
## CELLULARITYlow                                    0.03254 * 
## CELLULARITYmoderate                               0.46248   
## CELLULARITYnull                                   0.81377   
## HER2_SNP6LOSS                                     0.90876   
## HER2_SNP6NEUT                                     0.94403   
## HER2_SNP6UNDEF                                         NA   
## THREEGENEER+/HER2- High Prolif                    0.00800 **
## THREEGENEER+/HER2- Low Prolif                          NA   
## THREEGENEHER2+                                    0.74810   
## THREEGENEnull                                     0.00133 **
## CLAUDIN_SUBTYPEclaudin-low                        0.26289   
## CLAUDIN_SUBTYPEHer2                               0.85684   
## CLAUDIN_SUBTYPELumA                               0.99986   
## CLAUDIN_SUBTYPELumB                               0.01067 * 
## CLAUDIN_SUBTYPENC                                      NA   
## CLAUDIN_SUBTYPENormal                             0.99831   
## CHEMOTHERAPYYES                                   0.27073   
## HORMONE_THERAPYYES                                0.16220   
## RADIO_THERAPYYES                                  0.33256   
## HISTOLOGICAL_SUBTYPEDCIS                               NA   
## HISTOLOGICAL_SUBTYPEIDC                           0.99797   
## HISTOLOGICAL_SUBTYPEIDC-MED                       0.99808   
## HISTOLOGICAL_SUBTYPEIDC-MUC                            NA   
## HISTOLOGICAL_SUBTYPEIDC-TUB                       0.99996   
## HISTOLOGICAL_SUBTYPEIDC+ILC                            NA   
## HISTOLOGICAL_SUBTYPEILC                           0.99997   
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                    NA   
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE       NA   
## HISTOLOGICAL_SUBTYPEnull                               NA   
## HISTOLOGICAL_SUBTYPEOTHER                              NA   
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                     NA   
## HISTOLOGICAL_SUBTYPEPHYL                               NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                                  exp(coef) exp(-coef)
## COHORT                                           1.062e+00  9.417e-01
## AGE_AT_DIAGNOSIS                                 1.033e+00  9.683e-01
## NPI                                              1.184e+00  8.446e-01
## ER_IHCpos                                        6.031e-01  1.658e+00
## INFERRED_MENOPAUSAL_STATEpre                     1.285e+00  7.780e-01
## BREAST_SURGERYMASTECTOMY                         9.339e-01  1.071e+00
## BREAST_SURGERYnull                               5.198e+00  1.924e-01
## CELLULARITYlow                                   2.067e+00  4.837e-01
## CELLULARITYmoderate                              1.220e+00  8.197e-01
## CELLULARITYnull                                  7.836e-01  1.276e+00
## HER2_SNP6LOSS                                    9.356e-01  1.069e+00
## HER2_SNP6NEUT                                    9.737e-01  1.027e+00
## HER2_SNP6UNDEF                                          NA         NA
## THREEGENEER+/HER2- High Prolif                   4.509e+01  2.218e-02
## THREEGENEER+/HER2- Low Prolif                           NA         NA
## THREEGENEHER2+                                   7.002e-01  1.428e+00
## THREEGENEnull                                    2.313e+00  4.323e-01
## CLAUDIN_SUBTYPEclaudin-low                       7.381e-01  1.355e+00
## CLAUDIN_SUBTYPEHer2                              8.934e-01  1.119e+00
## CLAUDIN_SUBTYPELumA                              2.082e-01  4.803e+00
## CLAUDIN_SUBTYPELumB                              2.803e-02  3.568e+01
## CLAUDIN_SUBTYPENC                                       NA         NA
## CLAUDIN_SUBTYPENormal                            2.707e-08  3.694e+07
## CHEMOTHERAPYYES                                  1.482e+00  6.748e-01
## HORMONE_THERAPYYES                               1.406e+00  7.114e-01
## RADIO_THERAPYYES                                 7.622e-01  1.312e+00
## HISTOLOGICAL_SUBTYPEDCIS                                NA         NA
## HISTOLOGICAL_SUBTYPEIDC                          4.991e+07  2.004e-08
## HISTOLOGICAL_SUBTYPEIDC-MED                      1.817e+07  5.504e-08
## HISTOLOGICAL_SUBTYPEIDC-MUC                             NA         NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                      5.969e-01  1.675e+00
## HISTOLOGICAL_SUBTYPEIDC+ILC                             NA         NA
## HISTOLOGICAL_SUBTYPEILC                          7.503e-01  1.333e+00
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                     NA         NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE        NA         NA
## HISTOLOGICAL_SUBTYPEnull                                NA         NA
## HISTOLOGICAL_SUBTYPEOTHER                               NA         NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA         NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA         NA
##                                                  lower .95 upper .95
## COHORT                                            0.874763    1.2890
## AGE_AT_DIAGNOSIS                                  1.004167    1.0621
## NPI                                               0.808123    1.7348
## ER_IHCpos                                         0.241981    1.5033
## INFERRED_MENOPAUSAL_STATEpre                      0.612309    2.6979
## BREAST_SURGERYMASTECTOMY                          0.593086    1.4705
## BREAST_SURGERYnull                                1.732392   15.5978
## CELLULARITYlow                                    1.062255    4.0238
## CELLULARITYmoderate                               0.717886    2.0730
## CELLULARITYnull                                   0.103040    5.9593
## HER2_SNP6LOSS                                     0.299423    2.9232
## HER2_SNP6NEUT                                     0.463196    2.0470
## HER2_SNP6UNDEF                                          NA        NA
## THREEGENEER+/HER2- High Prolif                    2.702484  752.3891
## THREEGENEER+/HER2- Low Prolif                           NA        NA
## THREEGENEHER2+                                    0.079528    6.1645
## THREEGENEnull                                     1.385932    3.8603
## CLAUDIN_SUBTYPEclaudin-low                        0.433814    1.2560
## CLAUDIN_SUBTYPEHer2                               0.262665    3.0390
## CLAUDIN_SUBTYPELumA                               0.000000       Inf
## CLAUDIN_SUBTYPELumB                               0.001803    0.4358
## CLAUDIN_SUBTYPENC                                       NA        NA
## CLAUDIN_SUBTYPENormal                             0.000000       Inf
## CHEMOTHERAPYYES                                   0.735952    2.9838
## HORMONE_THERAPYYES                                0.871999    2.2658
## RADIO_THERAPYYES                                  0.440056    1.3201
## HISTOLOGICAL_SUBTYPEDCIS                                NA        NA
## HISTOLOGICAL_SUBTYPEIDC                           0.000000       Inf
## HISTOLOGICAL_SUBTYPEIDC-MED                       0.000000       Inf
## HISTOLOGICAL_SUBTYPEIDC-MUC                             NA        NA
## HISTOLOGICAL_SUBTYPEIDC-TUB                       0.000000       Inf
## HISTOLOGICAL_SUBTYPEIDC+ILC                             NA        NA
## HISTOLOGICAL_SUBTYPEILC                           0.000000       Inf
## HISTOLOGICAL_SUBTYPEINVASIVE TUMOUR                     NA        NA
## HISTOLOGICAL_SUBTYPEMIXED NST AND A SPECIAL TYPE        NA        NA
## HISTOLOGICAL_SUBTYPEnull                                NA        NA
## HISTOLOGICAL_SUBTYPEOTHER                               NA        NA
## HISTOLOGICAL_SUBTYPEOTHER INVASIVE                      NA        NA
## HISTOLOGICAL_SUBTYPEPHYL                                NA        NA
## 
## Concordance= 0.693  (se = 0.026 )
## Rsquare= 0.225   (max possible= 0.991 )
## Likelihood ratio test= 55.54  on 27 df,   p=0.001
## Wald test            = 47.91  on 27 df,   p=0.008
## Score (logrank) test = 56.38  on 27 df,   p=8e-04

Looking at INTCLUST10 patients, we see a few more covariates are significant than INTCLUST5, which include age at diagnosis, Breast surgery = null, cellularity = low, Threegene null or Threegene R+/HER2- High Prolif, and Claudin subtype = LumB.

L10AG <- mutate(L10, AG = ifelse((AGE_AT_DIAGNOSIS < 61), "Under61", "Over61"))
autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ AG, data = L10AG))

Again we will split the ages by over/under 61, and for this we see that the curves are very similar to start, but start to diverge after t=100 to where those under61 have a better survival curve than those who are over 61.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE == 'null', data = L10))

This shows that those who had Threegene = null, they had a lower survival rate than the rest of the subjects with any other threegene response. However, this again has pretty low sample so more data would be needed before coming to this conclusion.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE == 'ER-/HER2- High Prolif', data = L10))

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ THREEGENE, data = L10))

Looking at ER-/HER2- High Prolif vs. all of the Threegene responses, it seems like ER-/HER2- High Prolif has a slightly better survival rate at the beginning, but again with low volume it is difficult to make this conclusion.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ BREAST_SURGERY == 'null', data = L10))

Again, with low volume no real conclusion can be drawn from this information. It would seem to suggest those who do not get breast surgery have a much lower survival rate, but that cannot be concluded from the information we have.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CELLULARITY == 'low', data = L10))

Another low volume variable, this would suggest that patients with low cellularity would have a lower survival curve, but with low volume that assumption would need further testing.

autoplot(survfit(Surv(OS_MONTHS, OS_STATUS) ~ CLAUDIN_SUBTYPE == 'LumB', data = L10))

This would suggest that patients with INTCLUST 10 and Claudin subtype = LumB would have a higher survival curve than those who do not.

SUMMARY

Many of the INTCLUST 5 and INTCLUST 10 have some interesting ideas that could form the basis for further exploration, but due to some low volumes real conclusions would be difficult to draw from many of the variables within these subsets.

Gene Expression

gene <- readRDS("C:/Users/Dustin and Morgan/Desktop/gene_expression.rds")
geneclinical <- cbind(clinical, gene)
geneclincox <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = geneclinical)
summary(geneclincox)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + 
##     CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 + 
##     CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = geneclinical)
## 
##   n= 1904, number of events= 1103 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)    
## BRCA1   -0.10850   0.89718  0.22330 -0.486  0.62705    
## BRCA2    0.40503   1.49934  0.08722  4.644 3.42e-06 ***
## CDH1     0.15621   1.16908  0.21986  0.711  0.47739    
## PTEN     0.27725   1.31950  0.05365  5.168 2.36e-07 ***
## STK11    0.19620   1.21677  0.08226  2.385  0.01708 *  
## TP53     0.38638   1.47164  0.18901  2.044  0.04093 *  
## ATM      0.01648   1.01662  0.09562  0.172  0.86313    
## BARD1   -0.10585   0.89956  0.12528 -0.845  0.39817    
## CASP8   -0.17005   0.84362  0.22217 -0.765  0.44403    
## CTLA4    0.13552   1.14513  0.05507  2.461  0.01386 *  
## CYP19A1 -0.07757   0.92536  0.04898 -1.584  0.11330    
## FGFR2    0.01954   1.01973  0.03644  0.536  0.59187    
## LSP1    -0.21961   0.80283  0.07875 -2.789  0.00529 ** 
## MAP3K1   0.05887   1.06064  0.09135  0.644  0.51931    
## NBN      0.19444   1.21463  0.08128  2.392  0.01675 *  
## RAD51    0.07100   1.07358  0.20395  0.348  0.72776    
## TERT    -0.21812   0.80403  0.06634 -3.288  0.00101 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## BRCA1      0.8972     1.1146    0.5792    1.3898
## BRCA2      1.4993     0.6670    1.2637    1.7789
## CDH1       1.1691     0.8554    0.7598    1.7988
## PTEN       1.3195     0.7579    1.1878    1.4658
## STK11      1.2168     0.8218    1.0356    1.4297
## TP53       1.4716     0.6795    1.0161    2.1315
## ATM        1.0166     0.9837    0.8429    1.2262
## BARD1      0.8996     1.1117    0.7037    1.1499
## CASP8      0.8436     1.1854    0.5458    1.3039
## CTLA4      1.1451     0.8733    1.0280    1.2757
## CYP19A1    0.9254     1.0807    0.8407    1.0186
## FGFR2      1.0197     0.9807    0.9494    1.0952
## LSP1       0.8028     1.2456    0.6880    0.9368
## MAP3K1     1.0606     0.9428    0.8868    1.2686
## NBN        1.2146     0.8233    1.0358    1.4244
## RAD51      1.0736     0.9315    0.7198    1.6012
## TERT       0.8040     1.2437    0.7060    0.9157
## 
## Concordance= 0.599  (se = 0.009 )
## Rsquare= 0.045   (max possible= 1 )
## Likelihood ratio test= 88.22  on 17 df,   p=1e-11
## Wald test            = 89.98  on 17 df,   p=6e-12
## Score (logrank) test = 90.1  on 17 df,   p=6e-12

Using a cox model based on a list of genes that was found to have an impact on breast cancer (source: https://www.nationalbreastcancer.org/other-breast-cancer-genes), our model says that the most significant genes are BCRA2, PTEN, STK11, TP53, ATM, CTLA4, LSP1, NBN, and TERT.

Making a model with significant genes and covariates from the clinical data set

geneclincox2 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA2 + PTEN + STK11 +TP53 + ATM + CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = geneclinical)
summary(geneclincox2)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA2 + PTEN + STK11 + 
##     TP53 + ATM + CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY + 
##     NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + 
##     CHEMOTHERAPY, data = geneclinical)
## 
##   n= 1904, number of events= 1103 
## 
##                                   coef exp(coef)  se(coef)      z Pr(>|z|)
## BRCA2                         0.112928  1.119551  0.092206  1.225 0.220675
## PTEN                          0.239694  1.270861  0.066874  3.584 0.000338
## STK11                         0.120160  1.127677  0.080362  1.495 0.134853
## TP53                          0.415365  1.514923  0.188029  2.209 0.027171
## ATM                           0.170071  1.185389  0.097170  1.750 0.080075
## CTLA4                         0.153601  1.166026  0.055884  2.749 0.005986
## LSP1                         -0.261818  0.769651  0.080128 -3.267 0.001085
## NBN                           0.203465  1.225642  0.082080  2.479 0.013180
## TERT                         -0.203051  0.816236  0.067096 -3.026 0.002476
## INTCLUST10                   -0.613731  0.541327  0.175168 -3.504 0.000459
## INTCLUST2                     0.102642  1.108094  0.188927  0.543 0.586931
## INTCLUST3                    -0.247156  0.781019  0.158267 -1.562 0.118374
## INTCLUST4ER-                 -0.258340  0.772332  0.210216 -1.229 0.219098
## INTCLUST4ER+                 -0.204196  0.815303  0.167388 -1.220 0.222504
## INTCLUST5                     0.068813  1.071236  0.158224  0.435 0.663630
## INTCLUST6                    -0.083801  0.919614  0.180701 -0.464 0.642822
## INTCLUST7                    -0.259875  0.771148  0.164251 -1.582 0.113607
## INTCLUST8                    -0.131000  0.877218  0.150574 -0.870 0.384300
## INTCLUST9                    -0.105584  0.899799  0.162356 -0.650 0.515483
## LATERALITYnull                0.639837  1.896171  0.128277  4.988 6.10e-07
## LATERALITYr                  -0.058580  0.943103  0.063723 -0.919 0.357942
## NPI                           0.206454  1.229312  0.031896  6.473 9.62e-11
## INFERRED_MENOPAUSAL_STATEpre  0.510982  1.666928  0.120274  4.248 2.15e-05
## BREAST_SURGERYMASTECTOMY      0.299936  1.349773  0.066680  4.498 6.85e-06
## BREAST_SURGERYnull            0.512199  1.668957  0.295466  1.734 0.083002
## AGE_AT_DIAGNOSIS              0.052767  1.054184  0.003881 13.598  < 2e-16
## CHEMOTHERAPYYES               0.402654  1.495789  0.100704  3.998 6.38e-05
##                                 
## BRCA2                           
## PTEN                         ***
## STK11                           
## TP53                         *  
## ATM                          .  
## CTLA4                        ** 
## LSP1                         ** 
## NBN                          *  
## TERT                         ** 
## INTCLUST10                   ***
## INTCLUST2                       
## INTCLUST3                       
## INTCLUST4ER-                    
## INTCLUST4ER+                    
## INTCLUST5                       
## INTCLUST6                       
## INTCLUST7                       
## INTCLUST8                       
## INTCLUST9                       
## LATERALITYnull               ***
## LATERALITYr                     
## NPI                          ***
## INFERRED_MENOPAUSAL_STATEpre ***
## BREAST_SURGERYMASTECTOMY     ***
## BREAST_SURGERYnull           .  
## AGE_AT_DIAGNOSIS             ***
## CHEMOTHERAPYYES              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                              exp(coef) exp(-coef) lower .95 upper .95
## BRCA2                           1.1196     0.8932    0.9345    1.3413
## PTEN                            1.2709     0.7869    1.1147    1.4488
## STK11                           1.1277     0.8868    0.9633    1.3200
## TP53                            1.5149     0.6601    1.0479    2.1900
## ATM                             1.1854     0.8436    0.9798    1.4341
## CTLA4                           1.1660     0.8576    1.0451    1.3010
## LSP1                            0.7697     1.2993    0.6578    0.9005
## NBN                             1.2256     0.8159    1.0435    1.4396
## TERT                            0.8162     1.2251    0.7157    0.9310
## INTCLUST10                      0.5413     1.8473    0.3840    0.7631
## INTCLUST2                       1.1081     0.9025    0.7652    1.6047
## INTCLUST3                       0.7810     1.2804    0.5727    1.0651
## INTCLUST4ER-                    0.7723     1.2948    0.5115    1.1661
## INTCLUST4ER+                    0.8153     1.2265    0.5873    1.1319
## INTCLUST5                       1.0712     0.9335    0.7856    1.4607
## INTCLUST6                       0.9196     1.0874    0.6453    1.3104
## INTCLUST7                       0.7711     1.2968    0.5589    1.0640
## INTCLUST8                       0.8772     1.1400    0.6530    1.1784
## INTCLUST9                       0.8998     1.1114    0.6546    1.2369
## LATERALITYnull                  1.8962     0.5274    1.4746    2.4382
## LATERALITYr                     0.9431     1.0603    0.8324    1.0686
## NPI                             1.2293     0.8135    1.1548    1.3086
## INFERRED_MENOPAUSAL_STATEpre    1.6669     0.5999    1.3169    2.1101
## BREAST_SURGERYMASTECTOMY        1.3498     0.7409    1.1844    1.5382
## BREAST_SURGERYnull              1.6690     0.5992    0.9353    2.9781
## AGE_AT_DIAGNOSIS                1.0542     0.9486    1.0462    1.0622
## CHEMOTHERAPYYES                 1.4958     0.6685    1.2279    1.8222
## 
## Concordance= 0.684  (se = 0.009 )
## Rsquare= 0.225   (max possible= 1 )
## Likelihood ratio test= 484.6  on 27 df,   p=<2e-16
## Wald test            = 476.2  on 27 df,   p=<2e-16
## Score (logrank) test = 493.3  on 27 df,   p=<2e-16

This model shows a number of significant covariates, including genes such as PTEN, TP53, CTLA4, LSP1, NBN, TERT along with INTCLUST10, LATERALITY = null, NPI, Inferred menopausal state = pre menopausal, having a mastectomy, age at diagnosis, and chemotherapy yes. Another model will be fit with this and we will check with AIC to see which has the better fit.

geneclincox3 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~  PTEN + TP53 +  CTLA4 + LSP1 + NBN + TERT + INTCLUST + LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = geneclinical)
AIC(geneclincox2, geneclincox3)
##              df      AIC
## geneclincox2 27 14647.34
## geneclincox3 24 14648.57

Interestingly, the AIC model shows that the model with all our covarites and genes is slightly better at explaining our information than our model selected only by significant covariates. This would suggest that aribtrarily removing variables from our model is not a good thing. We can compare both to our original model as well

AIC(geneclincox, geneclincox2, geneclincox3)
##              df      AIC
## geneclincox  17 15023.72
## geneclincox2 27 14647.34
## geneclincox3 24 14648.57

Showing that both of our models with our gene data set combined with our clinical data set is better at explaining our information over the one that does not include this information. This seems to suggest that a combination of physical issues as well as genetic issues are needed to explain survival of breast cancer patients, which clinically could be useful. This would help to explain to researchers that only focusing on genetic or physical characteristics would not get the whole story when investigating this topic.

EXPLORING INTCLUST 10 WITH GENE DATA SET

When our gene data set was added to our clinical data set, we saw that intclust 10 was the only intclust that showed to be significant. With this we can now look at the genes within patients limited to intclust 10 and see which may have an impact.

G10 <- subset(geneclinical, INTCLUST == 10)
genecoxclinicalL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT+ LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = G10)
summary(genecoxclinicalL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + 
##     CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 + 
##     CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT + LATERALITY + 
##     NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + 
##     CHEMOTHERAPY, data = G10)
## 
##   n= 219, number of events= 104 
## 
##                                   coef exp(coef)  se(coef)      z Pr(>|z|)
## BRCA1                        -0.028281  0.972115  0.846902 -0.033  0.97336
## BRCA2                         0.294597  1.342585  0.294755  0.999  0.31757
## CDH1                          0.479831  1.615801  0.815985  0.588  0.55651
## PTEN                          0.144698  1.155691  0.198833  0.728  0.46677
## STK11                         0.346679  1.414362  0.256884  1.350  0.17716
## TP53                          0.025830  1.026167  0.584406  0.044  0.96475
## ATM                           0.105604  1.111381  0.258763  0.408  0.68319
## BARD1                        -0.037027  0.963650  0.363144 -0.102  0.91879
## CASP8                        -0.730440  0.481697  0.753662 -0.969  0.33245
## CTLA4                        -0.032703  0.967825  0.157262 -0.208  0.83526
## CYP19A1                      -0.001335  0.998666  0.152202 -0.009  0.99300
## FGFR2                        -0.074534  0.928176  0.129162 -0.577  0.56390
## LSP1                         -0.251084  0.777957  0.244325 -1.028  0.30411
## MAP3K1                       -0.492720  0.610962  0.386783 -1.274  0.20270
## NBN                           0.257665  1.293905  0.258267  0.998  0.31844
## RAD51                        -0.197629  0.820674  0.584996 -0.338  0.73549
## TERT                         -0.246571  0.781476  0.260270 -0.947  0.34345
## LATERALITYnull                1.029540  2.799778  0.394833  2.608  0.00912
## LATERALITYr                   0.128532  1.137157  0.227770  0.564  0.57255
## NPI                           0.216408  1.241609  0.185431  1.167  0.24319
## INFERRED_MENOPAUSAL_STATEpre -0.008909  0.991131  0.378064 -0.024  0.98120
## BREAST_SURGERYMASTECTOMY      0.196144  1.216702  0.224739  0.873  0.38279
## BREAST_SURGERYnull            1.032426  2.807868  0.655793  1.574  0.11541
## AGE_AT_DIAGNOSIS              0.024687  1.024994  0.015290  1.615  0.10639
## CHEMOTHERAPYYES               0.340553  1.405725  0.301912  1.128  0.25932
##                                
## BRCA1                          
## BRCA2                          
## CDH1                           
## PTEN                           
## STK11                          
## TP53                           
## ATM                            
## BARD1                          
## CASP8                          
## CTLA4                          
## CYP19A1                        
## FGFR2                          
## LSP1                           
## MAP3K1                         
## NBN                            
## RAD51                          
## TERT                           
## LATERALITYnull               **
## LATERALITYr                    
## NPI                            
## INFERRED_MENOPAUSAL_STATEpre   
## BREAST_SURGERYMASTECTOMY       
## BREAST_SURGERYnull             
## AGE_AT_DIAGNOSIS               
## CHEMOTHERAPYYES                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                              exp(coef) exp(-coef) lower .95 upper .95
## BRCA1                           0.9721     1.0287    0.1849     5.112
## BRCA2                           1.3426     0.7448    0.7534     2.392
## CDH1                            1.6158     0.6189    0.3265     7.998
## PTEN                            1.1557     0.8653    0.7827     1.706
## STK11                           1.4144     0.7070    0.8549     2.340
## TP53                            1.0262     0.9745    0.3264     3.226
## ATM                             1.1114     0.8998    0.6693     1.846
## BARD1                           0.9637     1.0377    0.4729     1.963
## CASP8                           0.4817     2.0760    0.1100     2.110
## CTLA4                           0.9678     1.0332    0.7111     1.317
## CYP19A1                         0.9987     1.0013    0.7411     1.346
## FGFR2                           0.9282     1.0774    0.7206     1.196
## LSP1                            0.7780     1.2854    0.4819     1.256
## MAP3K1                          0.6110     1.6368    0.2863     1.304
## NBN                             1.2939     0.7729    0.7799     2.147
## RAD51                           0.8207     1.2185    0.2607     2.583
## TERT                            0.7815     1.2796    0.4692     1.302
## LATERALITYnull                  2.7998     0.3572    1.2913     6.070
## LATERALITYr                     1.1372     0.8794    0.7277     1.777
## NPI                             1.2416     0.8054    0.8633     1.786
## INFERRED_MENOPAUSAL_STATEpre    0.9911     1.0089    0.4724     2.079
## BREAST_SURGERYMASTECTOMY        1.2167     0.8219    0.7832     1.890
## BREAST_SURGERYnull              2.8079     0.3561    0.7765    10.153
## AGE_AT_DIAGNOSIS                1.0250     0.9756    0.9947     1.056
## CHEMOTHERAPYYES                 1.4057     0.7114    0.7779     2.540
## 
## Concordance= 0.662  (se = 0.027 )
## Rsquare= 0.163   (max possible= 0.99 )
## Likelihood ratio test= 38.85  on 25 df,   p=0.04
## Wald test            = 43.8  on 25 df,   p=0.01
## Score (logrank) test = 50.32  on 25 df,   p=0.002

Oddly enough, this shows no genes are significant, and laterality = null is the only significant variable. I will attempt this again removing clinical data set covairates and only looking at genes.

genecoxclinicalL102 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = G10)
summary(genecoxclinicalL102)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + 
##     CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 + 
##     CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT, data = G10)
## 
##   n= 219, number of events= 104 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## BRCA1    0.08984   1.09400  0.82256  0.109   0.9130  
## BRCA2    0.41490   1.51421  0.27534  1.507   0.1318  
## CDH1     0.05918   1.06096  0.79621  0.074   0.9408  
## PTEN     0.14252   1.15318  0.19215  0.742   0.4583  
## STK11    0.43541   1.54560  0.24991  1.742   0.0815 .
## TP53     0.31930   1.37616  0.57077  0.559   0.5759  
## ATM      0.11560   1.12254  0.23840  0.485   0.6278  
## BARD1   -0.40478   0.66713  0.34754 -1.165   0.2441  
## CASP8   -0.51568   0.59709  0.73941 -0.697   0.4855  
## CTLA4   -0.04635   0.95471  0.15271 -0.304   0.7615  
## CYP19A1 -0.13210   0.87626  0.14847 -0.890   0.3736  
## FGFR2   -0.03510   0.96551  0.12826 -0.274   0.7843  
## LSP1    -0.13136   0.87690  0.23994 -0.547   0.5841  
## MAP3K1  -0.56580   0.56790  0.36443 -1.553   0.1205  
## NBN      0.32584   1.38520  0.24493  1.330   0.1834  
## RAD51    0.06416   1.06627  0.55446  0.116   0.9079  
## TERT    -0.42513   0.65369  0.25350 -1.677   0.0935 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## BRCA1      1.0940     0.9141    0.2182     5.485
## BRCA2      1.5142     0.6604    0.8827     2.598
## CDH1       1.0610     0.9425    0.2228     5.052
## PTEN       1.1532     0.8672    0.7913     1.681
## STK11      1.5456     0.6470    0.9471     2.522
## TP53       1.3762     0.7267    0.4496     4.212
## ATM        1.1225     0.8908    0.7035     1.791
## BARD1      0.6671     1.4990    0.3376     1.318
## CASP8      0.5971     1.6748    0.1402     2.543
## CTLA4      0.9547     1.0474    0.7078     1.288
## CYP19A1    0.8763     1.1412    0.6550     1.172
## FGFR2      0.9655     1.0357    0.7509     1.241
## LSP1       0.8769     1.1404    0.5479     1.403
## MAP3K1     0.5679     1.7609    0.2780     1.160
## NBN        1.3852     0.7219    0.8571     2.239
## RAD51      1.0663     0.9379    0.3597     3.161
## TERT       0.6537     1.5298    0.3977     1.074
## 
## Concordance= 0.607  (se = 0.031 )
## Rsquare= 0.073   (max possible= 0.99 )
## Likelihood ratio test= 16.58  on 17 df,   p=0.5
## Wald test            = 17.26  on 17 df,   p=0.4
## Score (logrank) test = 16.99  on 17 df,   p=0.5

This again shows that no genes are significant in survival rates with patients with intclust 10. Taking from what we saw before, maybe there is an area for exploration as to why intclust 10 seems to have a higher survival curve than most other intclust patients, and why the genes associated with breast cancer seem to not play a significant role with these patients.

LOOKING AT ALL INTCLUST PATIENTS BUT INTCLUST10

Gn10 <- subset(geneclinical, INTCLUST =! 10)
genecoxclinicalnL10 <- coxph(Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + CDH1 + PTEN + STK11 +TP53 + ATM + BARD1 + CASP8 + CTLA4 + CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT+ LATERALITY + NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + CHEMOTHERAPY, data = Gn10)
summary(genecoxclinicalnL10)
## Call:
## coxph(formula = Surv(OS_MONTHS, OS_STATUS) ~ BRCA1 + BRCA2 + 
##     CDH1 + PTEN + STK11 + TP53 + ATM + BARD1 + CASP8 + CTLA4 + 
##     CYP19A1 + FGFR2 + LSP1 + MAP3K1 + NBN + RAD51 + TERT + LATERALITY + 
##     NPI + INFERRED_MENOPAUSAL_STATE + BREAST_SURGERY + AGE_AT_DIAGNOSIS + 
##     CHEMOTHERAPY, data = Gn10)
## 
##   n= 1904, number of events= 1103 
## 
##                                   coef exp(coef)  se(coef)      z Pr(>|z|)
## BRCA1                        -0.091805  0.912283  0.223277 -0.411 0.680947
## BRCA2                         0.200737  1.222303  0.088790  2.261 0.023772
## CDH1                          0.189967  1.209209  0.217097  0.875 0.381558
## PTEN                          0.188517  1.207458  0.056694  3.325 0.000884
## STK11                         0.191844  1.211482  0.082760  2.318 0.020445
## TP53                          0.333706  1.396133  0.188175  1.773 0.076165
## ATM                           0.110259  1.116567  0.093972  1.173 0.240667
## BARD1                        -0.156496  0.855135  0.124690 -1.255 0.209449
## CASP8                        -0.248624  0.779873  0.225689 -1.102 0.270625
## CTLA4                         0.133297  1.142589  0.054824  2.431 0.015042
## CYP19A1                      -0.033263  0.967284  0.048323 -0.688 0.491230
## FGFR2                         0.061420  1.063345  0.036370  1.689 0.091263
## LSP1                         -0.281922  0.754333  0.079109 -3.564 0.000366
## MAP3K1                       -0.017227  0.982921  0.088798 -0.194 0.846177
## NBN                           0.189548  1.208703  0.081095  2.337 0.019420
## RAD51                        -0.053060  0.948323  0.202165 -0.262 0.792969
## TERT                         -0.150728  0.860082  0.066923 -2.252 0.024305
## LATERALITYnull                0.603814  1.829081  0.129596  4.659 3.17e-06
## LATERALITYr                  -0.074757  0.927969  0.063113 -1.185 0.236213
## NPI                           0.226434  1.254119  0.031596  7.167 7.69e-13
## INFERRED_MENOPAUSAL_STATEpre  0.460658  1.585117  0.120118  3.835 0.000126
## BREAST_SURGERYMASTECTOMY      0.310213  1.363715  0.066185  4.687 2.77e-06
## BREAST_SURGERYnull            0.660326  1.935423  0.293005  2.254 0.024219
## AGE_AT_DIAGNOSIS              0.052394  1.053791  0.003879 13.509  < 2e-16
## CHEMOTHERAPYYES               0.386462  1.471765  0.099187  3.896 9.77e-05
##                                 
## BRCA1                           
## BRCA2                        *  
## CDH1                            
## PTEN                         ***
## STK11                        *  
## TP53                         .  
## ATM                             
## BARD1                           
## CASP8                           
## CTLA4                        *  
## CYP19A1                         
## FGFR2                        .  
## LSP1                         ***
## MAP3K1                          
## NBN                          *  
## RAD51                           
## TERT                         *  
## LATERALITYnull               ***
## LATERALITYr                     
## NPI                          ***
## INFERRED_MENOPAUSAL_STATEpre ***
## BREAST_SURGERYMASTECTOMY     ***
## BREAST_SURGERYnull           *  
## AGE_AT_DIAGNOSIS             ***
## CHEMOTHERAPYYES              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                              exp(coef) exp(-coef) lower .95 upper .95
## BRCA1                           0.9123     1.0962    0.5889    1.4131
## BRCA2                           1.2223     0.8181    1.0271    1.4546
## CDH1                            1.2092     0.8270    0.7901    1.8505
## PTEN                            1.2075     0.8282    1.0805    1.3494
## STK11                           1.2115     0.8254    1.0301    1.4248
## TP53                            1.3961     0.7163    0.9655    2.0188
## ATM                             1.1166     0.8956    0.9287    1.3424
## BARD1                           0.8551     1.1694    0.6697    1.0919
## CASP8                           0.7799     1.2823    0.5011    1.2138
## CTLA4                           1.1426     0.8752    1.0262    1.2722
## CYP19A1                         0.9673     1.0338    0.8799    1.0634
## FGFR2                           1.0633     0.9404    0.9902    1.1419
## LSP1                            0.7543     1.3257    0.6460    0.8808
## MAP3K1                          0.9829     1.0174    0.8259    1.1698
## NBN                             1.2087     0.8273    1.0311    1.4169
## RAD51                           0.9483     1.0545    0.6381    1.4094
## TERT                            0.8601     1.1627    0.7544    0.9806
## LATERALITYnull                  1.8291     0.5467    1.4188    2.3580
## LATERALITYr                     0.9280     1.0776    0.8200    1.0502
## NPI                             1.2541     0.7974    1.1788    1.3342
## INFERRED_MENOPAUSAL_STATEpre    1.5851     0.6309    1.2526    2.0059
## BREAST_SURGERYMASTECTOMY        1.3637     0.7333    1.1978    1.5526
## BREAST_SURGERYnull              1.9354     0.5167    1.0899    3.4370
## AGE_AT_DIAGNOSIS                1.0538     0.9490    1.0458    1.0618
## CHEMOTHERAPYYES                 1.4718     0.6795    1.2117    1.7876
## 
## Concordance= 0.678  (se = 0.008 )
## Rsquare= 0.215   (max possible= 1 )
## Likelihood ratio test= 460  on 25 df,   p=<2e-16
## Wald test            = 450.5  on 25 df,   p=<2e-16
## Score (logrank) test = 466.7  on 25 df,   p=<2e-16

Opposite of the INTCLUST 10 subset, for all other patients who do not have INTCLUST 10, we see a number of significant genes including BRCA2, PTEN, STK11, CTLA4, LSP1, NRN, and TERT. This could be useful information to test to see why INTCLUST 10 does not seem to have their survival rates affected by certain genes, but the others do. Possibly this could be used as a future area of study for these types of patients.