Load packages

load survial analysis packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(survival)   
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr

load packages using to extract ovrian cancer information from TCGA

library(RTCGA)
## Welcome to the RTCGA (version: 1.16.0).
library(RTCGA.clinical)

Creat survival analysis data set

clin <- survivalTCGA(OV.clinical,extract.cols = c("patient.age_at_initial_pathologic_diagnosis","patient.race","patient.tumor_tissue_site","patient.lymphatic_invasion"))
## Warning: `mutate_()` was deprecated in dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
head(clin)
##           times bcr_patient_barcode patient.vital_status
## 12.81.0    1224        TCGA-04-1331                    1
## 12.81.0.1  1247        TCGA-04-1332                    1
## 12.81.0.2    55        TCGA-04-1335                    1
## 12.81.0.3  1495        TCGA-04-1336                    0
## 12.81.0.4    61        TCGA-04-1337                    1
## 12.81.0.5  1418        TCGA-04-1338                    0
##           patient.age_at_initial_pathologic_diagnosis              patient.race
## 12.81.0                                            78                     white
## 12.81.0.1                                          70                     white
## 12.81.0.2                                          60 black or african american
## 12.81.0.3                                          55                     white
## 12.81.0.4                                          78                     white
## 12.81.0.5                                          78                     white
##           patient.tumor_tissue_site patient.lymphatic_invasion
## 12.81.0                       ovary                        yes
## 12.81.0.1                     ovary                         no
## 12.81.0.2                     ovary                         no
## 12.81.0.3                     ovary                         no
## 12.81.0.4                     ovary                         no
## 12.81.0.5                     ovary                       <NA>

Basic analysis on the clinical data

par(mar=c(4,18,2,2))
barplot(table(clin$patient.race,useNA="always"),col=c("green","light green","pink","cyan","blue"),horiz = TRUE, las=2,names.arg = c(names(table(clin$patient.race)),"NA"), main="Number of OV patients for each race")

par(mar=c(4,4,4,2))
barplot(table(clin$patient.vital_status),names=c("Alive","Death"),main="Number of patients for Alive/Death")

barplot(table(clin$patient.tumor_tissue_site),main="Number of patients for each tumor sites")

barplot(table(clin$patient.lymphatic_invasion,useNA = "always"),names.arg = c("NO","YES","NA"),main="Number of patients with lymphatic invasion or not")

barplot(table(clin$patient.age_at_initial_pathologic_diagnosis),main = "Age distribution")

Observation: 1. The major population is white/caucasian. 2. The number of alive and dead patients are generally balanced. 3. The major tumor location is ovary.

Survival analysis

Implement survival analysis and check the association of age_at_initial_pathologic_diagnosis, race, tumor_tissue_site and lymphatic_invasion on survival

Survival analysis on lymphatic_invasion

surv_lym<-coxph(Surv(times, patient.vital_status)~patient.lymphatic_invasion, data=clin)

Survival analysis on age_at_initial_pathologic_diagnosis

clin$patient.age_at_initial_pathologic_diagnosis <- as.numeric(clin$patient.age_at_initial_pathologic_diagnosis)
surv_dt<-coxph(Surv(times, patient.vital_status)~patient.age_at_initial_pathologic_diagnosis, data=clin)

Survival analysis on race

race<-c(1,2,3,4,5,6)
names(race)<-unique(clin$patient.race)
clin$patient.race<-race[clin$patient.race]
surv_ra<-coxph(Surv(times, patient.vital_status)~patient.race, data=clin)

Survival analysis on tumor_tissue_site

ta<-c(1,2,3)
names(ta)<-unique(clin$patient.tumor_tissue_site)
clin$patient.tumor_tissue_site<-ta[clin$patient.tumor_tissue_site]
surv_ta<-coxph(Surv(times, patient.vital_status)~patient.tumor_tissue_site, data=clin)

Compare the contributions to survival rate accross different metrics

par(mar=c(3,14,2,2))
barplot(c(summary(surv_dt)$coef[5],summary(surv_lym)$coef[5],summary(surv_ra)$coef[5],summary(surv_ta)$coef[5]),horiz = TRUE,names.arg = c("age_at_initial_pathologic_diagnosis","lymphatic_invasion", "race","tumor_tissue_site"),las=2,main = "p-value of associations between survival rate and different clinical metrics")

surplotdata<-clin
highAge<-65
lowAge<-50
surplotdata[,"patient.age_at_initial_pathologic_diagnosis"] <- ifelse(surplotdata[,"patient.age_at_initial_pathologic_diagnosis"] >= highAge, 'High', ifelse(surplotdata[,"patient.age_at_initial_pathologic_diagnosis"]  <= lowAge, 'Low', 'Mid'))
ggsurvplot(survfit(Surv(times, patient.vital_status) ~ patient.age_at_initial_pathologic_diagnosis,data=surplotdata),data=surplotdata,risk.table = TRUE, pval = TRUE,break.time.by=500,ggtheme = theme_minimal(),risk.table.y.text.col = TRUE,risk.table.y.text = FALSE)

Conclusion

We can have the following conclusion.
1. Age_at_initial_pathologic_diagnosis have the most significant associatoin to survive of Ovarian cancer. It indicates that early detection of the cancer is the key factor to cure the patient or extend patient’s life for ovrian cancer.
2. Lymphatic invasion have a medium association to ovarian cancer’s survival which is in accordance with recent accademic publications on effective immune theray for cancer patient.
3. Patient’s race have a second highest associaion to the patient’s survival. Since we don’t have balanced data on patient’s race. The observation could be a biased result.